{'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:12:23.592187', 'step': 0, 'epoch': 0} {'type': 'pplx', 'content': 21944.183071258598, 'timestamp': '2025-09-30 22:12:23.611134', 'step': 0, 'epoch': 0} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:23.693982', 'step': 0, 'epoch': 1} {'type': 'loss', 'content': 1.0141518115997314, 'timestamp': '2025-09-30 22:12:23.698425', 'step': 1, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:23.733215', 'step': 1, 'epoch': 1} {'type': 'loss', 'content': 1.0238981246948242, 'timestamp': '2025-09-30 22:12:23.738526', 'step': 2, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:23.782811', 'step': 2, 'epoch': 1} {'type': 'loss', 'content': 1.0061830282211304, 'timestamp': '2025-09-30 22:12:23.788371', 'step': 3, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:23.827108', 'step': 3, 'epoch': 1} {'type': 'loss', 'content': 0.976143479347229, 'timestamp': '2025-09-30 22:12:23.898554', 'step': 4, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:23.932963', 'step': 4, 'epoch': 1} {'type': 'loss', 'content': 0.3519705533981323, 'timestamp': '2025-09-30 22:12:23.938136', 'step': 5, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:23.976816', 'step': 5, 'epoch': 1} {'type': 'loss', 'content': 0.29692506790161133, 'timestamp': '2025-09-30 22:12:23.981212', 'step': 6, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.012127', 'step': 6, 'epoch': 1} {'type': 'loss', 'content': 0.33580321073532104, 'timestamp': '2025-09-30 22:12:24.015385', 'step': 7, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.048289', 'step': 7, 'epoch': 1} {'type': 'loss', 'content': 0.318448930978775, 'timestamp': '2025-09-30 22:12:24.074010', 'step': 8, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.105182', 'step': 8, 'epoch': 1} {'type': 'loss', 'content': 0.18996313214302063, 'timestamp': '2025-09-30 22:12:24.111803', 'step': 9, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.147121', 'step': 9, 'epoch': 1} {'type': 'loss', 'content': 0.23508897423744202, 'timestamp': '2025-09-30 22:12:24.154362', 'step': 10, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.186497', 'step': 10, 'epoch': 1} {'type': 'loss', 'content': 0.1312141716480255, 'timestamp': '2025-09-30 22:12:24.192611', 'step': 11, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.226160', 'step': 11, 'epoch': 1} {'type': 'loss', 'content': 0.26553139090538025, 'timestamp': '2025-09-30 22:12:24.252877', 'step': 12, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.287546', 'step': 12, 'epoch': 1} {'type': 'loss', 'content': 0.16226935386657715, 'timestamp': '2025-09-30 22:12:24.293241', 'step': 13, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:24.325149', 'step': 13, 'epoch': 1} {'type': 'loss', 'content': 0.1756121963262558, 'timestamp': '2025-09-30 22:12:24.333118', 'step': 14, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.368570', 'step': 14, 'epoch': 1} {'type': 'loss', 'content': 0.1800687164068222, 'timestamp': '2025-09-30 22:12:24.373633', 'step': 15, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:24.404806', 'step': 15, 'epoch': 1} {'type': 'loss', 'content': 0.2871936857700348, 'timestamp': '2025-09-30 22:12:24.434239', 'step': 16, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.468557', 'step': 16, 'epoch': 1} {'type': 'loss', 'content': 0.1686691790819168, 'timestamp': '2025-09-30 22:12:24.475055', 'step': 17, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.509958', 'step': 17, 'epoch': 1} {'type': 'loss', 'content': 0.24103666841983795, 'timestamp': '2025-09-30 22:12:24.515821', 'step': 18, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:24.568137', 'step': 18, 'epoch': 1} {'type': 'loss', 'content': 0.3027922511100769, 'timestamp': '2025-09-30 22:12:24.575016', 'step': 19, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.609914', 'step': 19, 'epoch': 1} {'type': 'loss', 'content': 0.14118297398090363, 'timestamp': '2025-09-30 22:12:24.637065', 'step': 20, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.670153', 'step': 20, 'epoch': 1} {'type': 'loss', 'content': 0.30251896381378174, 'timestamp': '2025-09-30 22:12:24.676556', 'step': 21, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:24.707833', 'step': 21, 'epoch': 1} {'type': 'loss', 'content': 0.27288100123405457, 'timestamp': '2025-09-30 22:12:24.712827', 'step': 22, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:24.747312', 'step': 22, 'epoch': 1} {'type': 'loss', 'content': 0.13012588024139404, 'timestamp': '2025-09-30 22:12:24.754480', 'step': 23, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:24.790869', 'step': 23, 'epoch': 1} {'type': 'loss', 'content': 0.3108130693435669, 'timestamp': '2025-09-30 22:12:24.816418', 'step': 24, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.846449', 'step': 24, 'epoch': 1} {'type': 'loss', 'content': 0.21147465705871582, 'timestamp': '2025-09-30 22:12:24.851537', 'step': 25, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:24.894432', 'step': 25, 'epoch': 1} {'type': 'loss', 'content': 0.20987200736999512, 'timestamp': '2025-09-30 22:12:24.896437', 'step': 26, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:24.926386', 'step': 26, 'epoch': 1} {'type': 'loss', 'content': 0.3759233355522156, 'timestamp': '2025-09-30 22:12:24.929514', 'step': 27, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:24.962930', 'step': 27, 'epoch': 1} {'type': 'loss', 'content': 0.2687840759754181, 'timestamp': '2025-09-30 22:12:24.987122', 'step': 28, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.018340', 'step': 28, 'epoch': 1} {'type': 'loss', 'content': 0.17089782655239105, 'timestamp': '2025-09-30 22:12:25.020609', 'step': 29, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:25.050757', 'step': 29, 'epoch': 1} {'type': 'loss', 'content': 0.11952826380729675, 'timestamp': '2025-09-30 22:12:25.057952', 'step': 30, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.089260', 'step': 30, 'epoch': 1} {'type': 'loss', 'content': 0.21258053183555603, 'timestamp': '2025-09-30 22:12:25.091290', 'step': 31, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.126307', 'step': 31, 'epoch': 1} {'type': 'loss', 'content': 0.16641150414943695, 'timestamp': '2025-09-30 22:12:25.150450', 'step': 32, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.184132', 'step': 32, 'epoch': 1} {'type': 'loss', 'content': 0.2449382245540619, 'timestamp': '2025-09-30 22:12:25.186414', 'step': 33, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.216618', 'step': 33, 'epoch': 1} {'type': 'loss', 'content': 0.25585293769836426, 'timestamp': '2025-09-30 22:12:25.218784', 'step': 34, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.252263', 'step': 34, 'epoch': 1} {'type': 'loss', 'content': 0.3356994390487671, 'timestamp': '2025-09-30 22:12:25.256691', 'step': 35, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.296285', 'step': 35, 'epoch': 1} {'type': 'loss', 'content': 0.17795582115650177, 'timestamp': '2025-09-30 22:12:25.325207', 'step': 36, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.360356', 'step': 36, 'epoch': 1} {'type': 'loss', 'content': 0.32906419038772583, 'timestamp': '2025-09-30 22:12:25.366233', 'step': 37, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:25.399842', 'step': 37, 'epoch': 1} {'type': 'loss', 'content': 0.19516344368457794, 'timestamp': '2025-09-30 22:12:25.406544', 'step': 38, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:25.442836', 'step': 38, 'epoch': 1} {'type': 'loss', 'content': 0.3113977611064911, 'timestamp': '2025-09-30 22:12:25.445948', 'step': 39, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.476822', 'step': 39, 'epoch': 1} {'type': 'loss', 'content': 0.2701047658920288, 'timestamp': '2025-09-30 22:12:25.501073', 'step': 40, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.534762', 'step': 40, 'epoch': 1} {'type': 'loss', 'content': 0.1617232859134674, 'timestamp': '2025-09-30 22:12:25.541710', 'step': 41, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.574805', 'step': 41, 'epoch': 1} {'type': 'loss', 'content': 0.19348984956741333, 'timestamp': '2025-09-30 22:12:25.581062', 'step': 42, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.611987', 'step': 42, 'epoch': 1} {'type': 'loss', 'content': 0.17057806253433228, 'timestamp': '2025-09-30 22:12:25.618260', 'step': 43, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.652629', 'step': 43, 'epoch': 1} {'type': 'loss', 'content': 0.2595454454421997, 'timestamp': '2025-09-30 22:12:25.679534', 'step': 44, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.716157', 'step': 44, 'epoch': 1} {'type': 'loss', 'content': 0.24335919320583344, 'timestamp': '2025-09-30 22:12:25.720456', 'step': 45, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:25.756380', 'step': 45, 'epoch': 1} {'type': 'loss', 'content': 0.2559993863105774, 'timestamp': '2025-09-30 22:12:25.761708', 'step': 46, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:25.803440', 'step': 46, 'epoch': 1} {'type': 'loss', 'content': 0.2078448086977005, 'timestamp': '2025-09-30 22:12:25.806138', 'step': 47, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.836383', 'step': 47, 'epoch': 1} {'type': 'loss', 'content': 0.3196363151073456, 'timestamp': '2025-09-30 22:12:25.860893', 'step': 48, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:25.891267', 'step': 48, 'epoch': 1} {'type': 'loss', 'content': 0.1824292242527008, 'timestamp': '2025-09-30 22:12:25.895501', 'step': 49, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.926613', 'step': 49, 'epoch': 1} {'type': 'loss', 'content': 0.19426241517066956, 'timestamp': '2025-09-30 22:12:25.929241', 'step': 50, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:25.961546', 'step': 50, 'epoch': 1} {'type': 'loss', 'content': 0.24494697153568268, 'timestamp': '2025-09-30 22:12:25.963630', 'step': 51, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:25.997746', 'step': 51, 'epoch': 1} {'type': 'loss', 'content': 0.3315775394439697, 'timestamp': '2025-09-30 22:12:26.027056', 'step': 52, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:26.059812', 'step': 52, 'epoch': 1} {'type': 'loss', 'content': 0.19703251123428345, 'timestamp': '2025-09-30 22:12:26.067475', 'step': 53, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:26.098820', 'step': 53, 'epoch': 1} {'type': 'loss', 'content': 0.2087213546037674, 'timestamp': '2025-09-30 22:12:26.102128', 'step': 54, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:26.133213', 'step': 54, 'epoch': 1} {'type': 'loss', 'content': 0.24082030355930328, 'timestamp': '2025-09-30 22:12:26.136226', 'step': 55, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:26.183266', 'step': 55, 'epoch': 1} {'type': 'loss', 'content': 0.10759132355451584, 'timestamp': '2025-09-30 22:12:26.212749', 'step': 56, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:26.247136', 'step': 56, 'epoch': 1} {'type': 'loss', 'content': 0.17894507944583893, 'timestamp': '2025-09-30 22:12:26.250249', 'step': 57, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.280652', 'step': 57, 'epoch': 1} {'type': 'loss', 'content': 0.2080574482679367, 'timestamp': '2025-09-30 22:12:26.284848', 'step': 58, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:26.315403', 'step': 58, 'epoch': 1} {'type': 'loss', 'content': 0.2275456339120865, 'timestamp': '2025-09-30 22:12:26.318528', 'step': 59, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:26.357096', 'step': 59, 'epoch': 1} {'type': 'loss', 'content': 0.16882026195526123, 'timestamp': '2025-09-30 22:12:26.382203', 'step': 60, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:26.412921', 'step': 60, 'epoch': 1} {'type': 'loss', 'content': 0.20243602991104126, 'timestamp': '2025-09-30 22:12:26.416480', 'step': 61, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:26.451763', 'step': 61, 'epoch': 1} {'type': 'loss', 'content': 0.14168578386306763, 'timestamp': '2025-09-30 22:12:26.454519', 'step': 62, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:26.485090', 'step': 62, 'epoch': 1} {'type': 'loss', 'content': 0.22573652863502502, 'timestamp': '2025-09-30 22:12:26.488942', 'step': 63, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.525367', 'step': 63, 'epoch': 1} {'type': 'loss', 'content': 0.25557464361190796, 'timestamp': '2025-09-30 22:12:26.550306', 'step': 64, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:26.584937', 'step': 64, 'epoch': 1} {'type': 'loss', 'content': 0.1330631971359253, 'timestamp': '2025-09-30 22:12:26.588951', 'step': 65, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:26.625230', 'step': 65, 'epoch': 1} {'type': 'loss', 'content': 0.25941404700279236, 'timestamp': '2025-09-30 22:12:26.631238', 'step': 66, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:26.662624', 'step': 66, 'epoch': 1} {'type': 'loss', 'content': 0.29442957043647766, 'timestamp': '2025-09-30 22:12:26.669631', 'step': 67, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.700651', 'step': 67, 'epoch': 1} {'type': 'loss', 'content': 0.20189352333545685, 'timestamp': '2025-09-30 22:12:26.730439', 'step': 68, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:26.761374', 'step': 68, 'epoch': 1} {'type': 'loss', 'content': 0.23629234731197357, 'timestamp': '2025-09-30 22:12:26.769365', 'step': 69, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.805799', 'step': 69, 'epoch': 1} {'type': 'loss', 'content': 0.2542906701564789, 'timestamp': '2025-09-30 22:12:26.813049', 'step': 70, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.846116', 'step': 70, 'epoch': 1} {'type': 'loss', 'content': 0.16322019696235657, 'timestamp': '2025-09-30 22:12:26.849889', 'step': 71, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.888846', 'step': 71, 'epoch': 1} {'type': 'loss', 'content': 0.20134733617305756, 'timestamp': '2025-09-30 22:12:26.915364', 'step': 72, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:26.947485', 'step': 72, 'epoch': 1} {'type': 'loss', 'content': 0.19501763582229614, 'timestamp': '2025-09-30 22:12:26.953551', 'step': 73, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:26.984856', 'step': 73, 'epoch': 1} {'type': 'loss', 'content': 0.19847339391708374, 'timestamp': '2025-09-30 22:12:26.987863', 'step': 74, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.017890', 'step': 74, 'epoch': 1} {'type': 'loss', 'content': 0.1855669468641281, 'timestamp': '2025-09-30 22:12:27.024301', 'step': 75, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.055981', 'step': 75, 'epoch': 1} {'type': 'loss', 'content': 0.3313688039779663, 'timestamp': '2025-09-30 22:12:27.080181', 'step': 76, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.114415', 'step': 76, 'epoch': 1} {'type': 'loss', 'content': 0.16967719793319702, 'timestamp': '2025-09-30 22:12:27.117764', 'step': 77, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.148314', 'step': 77, 'epoch': 1} {'type': 'loss', 'content': 0.21101659536361694, 'timestamp': '2025-09-30 22:12:27.152043', 'step': 78, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.187915', 'step': 78, 'epoch': 1} {'type': 'loss', 'content': 0.169295534491539, 'timestamp': '2025-09-30 22:12:27.190827', 'step': 79, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.224019', 'step': 79, 'epoch': 1} {'type': 'loss', 'content': 0.2678598165512085, 'timestamp': '2025-09-30 22:12:27.249045', 'step': 80, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:27.284932', 'step': 80, 'epoch': 1} {'type': 'loss', 'content': 0.22564418613910675, 'timestamp': '2025-09-30 22:12:27.288058', 'step': 81, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.321293', 'step': 81, 'epoch': 1} {'type': 'loss', 'content': 0.19700773060321808, 'timestamp': '2025-09-30 22:12:27.324340', 'step': 82, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.360899', 'step': 82, 'epoch': 1} {'type': 'loss', 'content': 0.1398516297340393, 'timestamp': '2025-09-30 22:12:27.367863', 'step': 83, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.404568', 'step': 83, 'epoch': 1} {'type': 'loss', 'content': 0.2393532246351242, 'timestamp': '2025-09-30 22:12:27.431220', 'step': 84, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.461845', 'step': 84, 'epoch': 1} {'type': 'loss', 'content': 0.18680119514465332, 'timestamp': '2025-09-30 22:12:27.466542', 'step': 85, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.498670', 'step': 85, 'epoch': 1} {'type': 'loss', 'content': 0.2006261646747589, 'timestamp': '2025-09-30 22:12:27.501991', 'step': 86, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.537184', 'step': 86, 'epoch': 1} {'type': 'loss', 'content': 0.20874693989753723, 'timestamp': '2025-09-30 22:12:27.545419', 'step': 87, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.576932', 'step': 87, 'epoch': 1} {'type': 'loss', 'content': 0.22447073459625244, 'timestamp': '2025-09-30 22:12:27.601027', 'step': 88, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.631853', 'step': 88, 'epoch': 1} {'type': 'loss', 'content': 0.15151748061180115, 'timestamp': '2025-09-30 22:12:27.637981', 'step': 89, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.671376', 'step': 89, 'epoch': 1} {'type': 'loss', 'content': 0.2416563332080841, 'timestamp': '2025-09-30 22:12:27.673465', 'step': 90, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.705305', 'step': 90, 'epoch': 1} {'type': 'loss', 'content': 0.18020948767662048, 'timestamp': '2025-09-30 22:12:27.710835', 'step': 91, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.744301', 'step': 91, 'epoch': 1} {'type': 'loss', 'content': 0.16493192315101624, 'timestamp': '2025-09-30 22:12:27.770407', 'step': 92, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.801360', 'step': 92, 'epoch': 1} {'type': 'loss', 'content': 0.24230845272541046, 'timestamp': '2025-09-30 22:12:27.804570', 'step': 93, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:27.834810', 'step': 93, 'epoch': 1} {'type': 'loss', 'content': 0.21955330669879913, 'timestamp': '2025-09-30 22:12:27.838075', 'step': 94, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:27.867684', 'step': 94, 'epoch': 1} {'type': 'loss', 'content': 0.17033526301383972, 'timestamp': '2025-09-30 22:12:27.872704', 'step': 95, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:27.904251', 'step': 95, 'epoch': 1} {'type': 'loss', 'content': 0.21352766454219818, 'timestamp': '2025-09-30 22:12:27.928424', 'step': 96, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.958538', 'step': 96, 'epoch': 1} {'type': 'loss', 'content': 0.20051194727420807, 'timestamp': '2025-09-30 22:12:27.962989', 'step': 97, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:27.992394', 'step': 97, 'epoch': 1} {'type': 'loss', 'content': 0.22929754853248596, 'timestamp': '2025-09-30 22:12:27.996939', 'step': 98, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:28.028522', 'step': 98, 'epoch': 1} {'type': 'loss', 'content': 0.1596464067697525, 'timestamp': '2025-09-30 22:12:28.031522', 'step': 99, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.061467', 'step': 99, 'epoch': 1} {'type': 'loss', 'content': 0.241256445646286, 'timestamp': '2025-09-30 22:12:28.086841', 'step': 100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:28.120128', 'step': 100, 'epoch': 1} {'type': 'loss', 'content': 0.2426563799381256, 'timestamp': '2025-09-30 22:12:28.132949', 'step': 101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.175200', 'step': 101, 'epoch': 1} {'type': 'loss', 'content': 0.24106037616729736, 'timestamp': '2025-09-30 22:12:28.179063', 'step': 102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:28.209103', 'step': 102, 'epoch': 1} {'type': 'loss', 'content': 0.331752747297287, 'timestamp': '2025-09-30 22:12:28.212115', 'step': 103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.244021', 'step': 103, 'epoch': 1} {'type': 'loss', 'content': 0.21137656271457672, 'timestamp': '2025-09-30 22:12:28.273229', 'step': 104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.301959', 'step': 104, 'epoch': 1} {'type': 'loss', 'content': 0.17782379686832428, 'timestamp': '2025-09-30 22:12:28.306701', 'step': 105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.337229', 'step': 105, 'epoch': 1} {'type': 'loss', 'content': 0.2419508695602417, 'timestamp': '2025-09-30 22:12:28.341609', 'step': 106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:28.372048', 'step': 106, 'epoch': 1} {'type': 'loss', 'content': 0.336931973695755, 'timestamp': '2025-09-30 22:12:28.380463', 'step': 107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.415022', 'step': 107, 'epoch': 1} {'type': 'loss', 'content': 0.32855474948883057, 'timestamp': '2025-09-30 22:12:28.438703', 'step': 108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:28.468576', 'step': 108, 'epoch': 1} {'type': 'loss', 'content': 0.19936908781528473, 'timestamp': '2025-09-30 22:12:28.470332', 'step': 109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.499781', 'step': 109, 'epoch': 1} {'type': 'loss', 'content': 0.2731262445449829, 'timestamp': '2025-09-30 22:12:28.502023', 'step': 110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.530896', 'step': 110, 'epoch': 1} {'type': 'loss', 'content': 0.15417678654193878, 'timestamp': '2025-09-30 22:12:28.533004', 'step': 111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.571523', 'step': 111, 'epoch': 1} {'type': 'loss', 'content': 0.16667774319648743, 'timestamp': '2025-09-30 22:12:28.595451', 'step': 112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.625035', 'step': 112, 'epoch': 1} {'type': 'loss', 'content': 0.20493417978286743, 'timestamp': '2025-09-30 22:12:28.629100', 'step': 113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.664855', 'step': 113, 'epoch': 1} {'type': 'loss', 'content': 0.3324694037437439, 'timestamp': '2025-09-30 22:12:28.667256', 'step': 114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:28.697285', 'step': 114, 'epoch': 1} {'type': 'loss', 'content': 0.1747734099626541, 'timestamp': '2025-09-30 22:12:28.712132', 'step': 115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.751884', 'step': 115, 'epoch': 1} {'type': 'loss', 'content': 0.2602540850639343, 'timestamp': '2025-09-30 22:12:28.776714', 'step': 116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.806786', 'step': 116, 'epoch': 1} {'type': 'loss', 'content': 0.19310329854488373, 'timestamp': '2025-09-30 22:12:28.810272', 'step': 117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.842188', 'step': 117, 'epoch': 1} {'type': 'loss', 'content': 0.21487262845039368, 'timestamp': '2025-09-30 22:12:28.846044', 'step': 118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:28.883484', 'step': 118, 'epoch': 1} {'type': 'loss', 'content': 0.23102399706840515, 'timestamp': '2025-09-30 22:12:28.894166', 'step': 119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:28.929318', 'step': 119, 'epoch': 1} {'type': 'loss', 'content': 0.26671233773231506, 'timestamp': '2025-09-30 22:12:28.956211', 'step': 120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:28.993532', 'step': 120, 'epoch': 1} {'type': 'loss', 'content': 0.24205079674720764, 'timestamp': '2025-09-30 22:12:28.995860', 'step': 121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:29.029974', 'step': 121, 'epoch': 1} {'type': 'loss', 'content': 0.18530039489269257, 'timestamp': '2025-09-30 22:12:29.033160', 'step': 122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:29.064767', 'step': 122, 'epoch': 1} {'type': 'loss', 'content': 0.2864021360874176, 'timestamp': '2025-09-30 22:12:29.067383', 'step': 123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:29.097145', 'step': 123, 'epoch': 1} {'type': 'loss', 'content': 0.18357302248477936, 'timestamp': '2025-09-30 22:12:29.122120', 'step': 124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.152170', 'step': 124, 'epoch': 1} {'type': 'loss', 'content': 0.2224341630935669, 'timestamp': '2025-09-30 22:12:29.154450', 'step': 125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.186839', 'step': 125, 'epoch': 1} {'type': 'loss', 'content': 0.173418328166008, 'timestamp': '2025-09-30 22:12:29.189179', 'step': 126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.219194', 'step': 126, 'epoch': 1} {'type': 'loss', 'content': 0.20812702178955078, 'timestamp': '2025-09-30 22:12:29.221107', 'step': 127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.250625', 'step': 127, 'epoch': 1} {'type': 'loss', 'content': 0.37556466460227966, 'timestamp': '2025-09-30 22:12:29.274765', 'step': 128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:29.305261', 'step': 128, 'epoch': 1} {'type': 'loss', 'content': 0.21204793453216553, 'timestamp': '2025-09-30 22:12:29.307483', 'step': 129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.339420', 'step': 129, 'epoch': 1} {'type': 'loss', 'content': 0.19529938697814941, 'timestamp': '2025-09-30 22:12:29.342348', 'step': 130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.373137', 'step': 130, 'epoch': 1} {'type': 'loss', 'content': 0.14312972128391266, 'timestamp': '2025-09-30 22:12:29.381359', 'step': 131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.417859', 'step': 131, 'epoch': 1} {'type': 'loss', 'content': 0.2027161866426468, 'timestamp': '2025-09-30 22:12:29.442975', 'step': 132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.492195', 'step': 132, 'epoch': 1} {'type': 'loss', 'content': 0.25213515758514404, 'timestamp': '2025-09-30 22:12:29.495084', 'step': 133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.525820', 'step': 133, 'epoch': 1} {'type': 'loss', 'content': 0.3055337071418762, 'timestamp': '2025-09-30 22:12:29.528284', 'step': 134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.564219', 'step': 134, 'epoch': 1} {'type': 'loss', 'content': 0.16642311215400696, 'timestamp': '2025-09-30 22:12:29.569787', 'step': 135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:29.603872', 'step': 135, 'epoch': 1} {'type': 'loss', 'content': 0.19619596004486084, 'timestamp': '2025-09-30 22:12:29.633473', 'step': 136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.665447', 'step': 136, 'epoch': 1} {'type': 'loss', 'content': 0.2823658585548401, 'timestamp': '2025-09-30 22:12:29.673469', 'step': 137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.708831', 'step': 137, 'epoch': 1} {'type': 'loss', 'content': 0.21800567209720612, 'timestamp': '2025-09-30 22:12:29.712083', 'step': 138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.750694', 'step': 138, 'epoch': 1} {'type': 'loss', 'content': 0.178096204996109, 'timestamp': '2025-09-30 22:12:29.753146', 'step': 139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:29.786456', 'step': 139, 'epoch': 1} {'type': 'loss', 'content': 0.3022322356700897, 'timestamp': '2025-09-30 22:12:29.812070', 'step': 140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.846212', 'step': 140, 'epoch': 1} {'type': 'loss', 'content': 0.21843716502189636, 'timestamp': '2025-09-30 22:12:29.848467', 'step': 141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:29.883082', 'step': 141, 'epoch': 1} {'type': 'loss', 'content': 0.19217249751091003, 'timestamp': '2025-09-30 22:12:29.888137', 'step': 142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:29.923127', 'step': 142, 'epoch': 1} {'type': 'loss', 'content': 0.21812860667705536, 'timestamp': '2025-09-30 22:12:29.927393', 'step': 143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:29.961959', 'step': 143, 'epoch': 1} {'type': 'loss', 'content': 0.24701954424381256, 'timestamp': '2025-09-30 22:12:29.986062', 'step': 144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.019167', 'step': 144, 'epoch': 1} {'type': 'loss', 'content': 0.1708715707063675, 'timestamp': '2025-09-30 22:12:30.025843', 'step': 145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:30.058715', 'step': 145, 'epoch': 1} {'type': 'loss', 'content': 0.2755560874938965, 'timestamp': '2025-09-30 22:12:30.061260', 'step': 146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.095747', 'step': 146, 'epoch': 1} {'type': 'loss', 'content': 0.24137064814567566, 'timestamp': '2025-09-30 22:12:30.098409', 'step': 147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.128560', 'step': 147, 'epoch': 1} {'type': 'loss', 'content': 0.1995687186717987, 'timestamp': '2025-09-30 22:12:30.153196', 'step': 148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:30.183694', 'step': 148, 'epoch': 1} {'type': 'loss', 'content': 0.12591280043125153, 'timestamp': '2025-09-30 22:12:30.190950', 'step': 149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.225018', 'step': 149, 'epoch': 1} {'type': 'loss', 'content': 0.21968258917331696, 'timestamp': '2025-09-30 22:12:30.230555', 'step': 150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:30.264638', 'step': 150, 'epoch': 1} {'type': 'loss', 'content': 0.15458029508590698, 'timestamp': '2025-09-30 22:12:30.270087', 'step': 151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:30.304642', 'step': 151, 'epoch': 1} {'type': 'loss', 'content': 0.17414362728595734, 'timestamp': '2025-09-30 22:12:30.329426', 'step': 152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:30.365158', 'step': 152, 'epoch': 1} {'type': 'loss', 'content': 0.2395186722278595, 'timestamp': '2025-09-30 22:12:30.368813', 'step': 153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.399852', 'step': 153, 'epoch': 1} {'type': 'loss', 'content': 0.24329061806201935, 'timestamp': '2025-09-30 22:12:30.403498', 'step': 154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:30.450848', 'step': 154, 'epoch': 1} {'type': 'loss', 'content': 0.3351372480392456, 'timestamp': '2025-09-30 22:12:30.458716', 'step': 155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.490791', 'step': 155, 'epoch': 1} {'type': 'loss', 'content': 0.24035309255123138, 'timestamp': '2025-09-30 22:12:30.519124', 'step': 156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:30.554838', 'step': 156, 'epoch': 1} {'type': 'loss', 'content': 0.1587892323732376, 'timestamp': '2025-09-30 22:12:30.561392', 'step': 157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.592151', 'step': 157, 'epoch': 1} {'type': 'loss', 'content': 0.2288503646850586, 'timestamp': '2025-09-30 22:12:30.594767', 'step': 158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.628881', 'step': 158, 'epoch': 1} {'type': 'loss', 'content': 0.16624340415000916, 'timestamp': '2025-09-30 22:12:30.631498', 'step': 159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.663053', 'step': 159, 'epoch': 1} {'type': 'loss', 'content': 0.20748713612556458, 'timestamp': '2025-09-30 22:12:30.687444', 'step': 160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:30.717468', 'step': 160, 'epoch': 1} {'type': 'loss', 'content': 0.22200942039489746, 'timestamp': '2025-09-30 22:12:30.723087', 'step': 161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.753820', 'step': 161, 'epoch': 1} {'type': 'loss', 'content': 0.3166126310825348, 'timestamp': '2025-09-30 22:12:30.760798', 'step': 162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:30.793604', 'step': 162, 'epoch': 1} {'type': 'loss', 'content': 0.20742186903953552, 'timestamp': '2025-09-30 22:12:30.796542', 'step': 163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.826646', 'step': 163, 'epoch': 1} {'type': 'loss', 'content': 0.1768728643655777, 'timestamp': '2025-09-30 22:12:30.856820', 'step': 164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:30.887028', 'step': 164, 'epoch': 1} {'type': 'loss', 'content': 0.27447807788848877, 'timestamp': '2025-09-30 22:12:30.889722', 'step': 165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:30.920844', 'step': 165, 'epoch': 1} {'type': 'loss', 'content': 0.1672525852918625, 'timestamp': '2025-09-30 22:12:30.923854', 'step': 166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:30.953570', 'step': 166, 'epoch': 1} {'type': 'loss', 'content': 0.11867125332355499, 'timestamp': '2025-09-30 22:12:30.960818', 'step': 167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:30.991992', 'step': 167, 'epoch': 1} {'type': 'loss', 'content': 0.1971505582332611, 'timestamp': '2025-09-30 22:12:31.020662', 'step': 168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.053711', 'step': 168, 'epoch': 1} {'type': 'loss', 'content': 0.1461939960718155, 'timestamp': '2025-09-30 22:12:31.057123', 'step': 169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:31.088294', 'step': 169, 'epoch': 1} {'type': 'loss', 'content': 0.18815356492996216, 'timestamp': '2025-09-30 22:12:31.091392', 'step': 170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.124569', 'step': 170, 'epoch': 1} {'type': 'loss', 'content': 0.15266594290733337, 'timestamp': '2025-09-30 22:12:31.129405', 'step': 171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.160660', 'step': 171, 'epoch': 1} {'type': 'loss', 'content': 0.2904774844646454, 'timestamp': '2025-09-30 22:12:31.185722', 'step': 172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.223514', 'step': 172, 'epoch': 1} {'type': 'loss', 'content': 0.26233452558517456, 'timestamp': '2025-09-30 22:12:31.226253', 'step': 173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.262918', 'step': 173, 'epoch': 1} {'type': 'loss', 'content': 0.22782431542873383, 'timestamp': '2025-09-30 22:12:31.266394', 'step': 174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.297412', 'step': 174, 'epoch': 1} {'type': 'loss', 'content': 0.2802799940109253, 'timestamp': '2025-09-30 22:12:31.301577', 'step': 175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.332637', 'step': 175, 'epoch': 1} {'type': 'loss', 'content': 0.25582462549209595, 'timestamp': '2025-09-30 22:12:31.358351', 'step': 176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:31.389393', 'step': 176, 'epoch': 1} {'type': 'loss', 'content': 0.2556842863559723, 'timestamp': '2025-09-30 22:12:31.400581', 'step': 177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.442395', 'step': 177, 'epoch': 1} {'type': 'loss', 'content': 0.1560559719800949, 'timestamp': '2025-09-30 22:12:31.450179', 'step': 178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.480978', 'step': 178, 'epoch': 1} {'type': 'loss', 'content': 0.2352103292942047, 'timestamp': '2025-09-30 22:12:31.495896', 'step': 179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:31.536439', 'step': 179, 'epoch': 1} {'type': 'loss', 'content': 0.18563207983970642, 'timestamp': '2025-09-30 22:12:31.572396', 'step': 180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:31.612020', 'step': 180, 'epoch': 1} {'type': 'loss', 'content': 0.25445741415023804, 'timestamp': '2025-09-30 22:12:31.614407', 'step': 181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.649075', 'step': 181, 'epoch': 1} {'type': 'loss', 'content': 0.1765289306640625, 'timestamp': '2025-09-30 22:12:31.658147', 'step': 182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.688373', 'step': 182, 'epoch': 1} {'type': 'loss', 'content': 0.21252788603305817, 'timestamp': '2025-09-30 22:12:31.690485', 'step': 183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.722096', 'step': 183, 'epoch': 1} {'type': 'loss', 'content': 0.17178334295749664, 'timestamp': '2025-09-30 22:12:31.745956', 'step': 184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:31.777334', 'step': 184, 'epoch': 1} {'type': 'loss', 'content': 0.1971403807401657, 'timestamp': '2025-09-30 22:12:31.780444', 'step': 185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:31.816411', 'step': 185, 'epoch': 1} {'type': 'loss', 'content': 0.14861834049224854, 'timestamp': '2025-09-30 22:12:31.823787', 'step': 186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:31.856580', 'step': 186, 'epoch': 1} {'type': 'loss', 'content': 0.1696929782629013, 'timestamp': '2025-09-30 22:12:31.859322', 'step': 187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:31.892828', 'step': 187, 'epoch': 1} {'type': 'loss', 'content': 0.2067308872938156, 'timestamp': '2025-09-30 22:12:31.916228', 'step': 188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:31.949668', 'step': 188, 'epoch': 1} {'type': 'loss', 'content': 0.25713014602661133, 'timestamp': '2025-09-30 22:12:31.952654', 'step': 189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:31.983906', 'step': 189, 'epoch': 1} {'type': 'loss', 'content': 0.3368283212184906, 'timestamp': '2025-09-30 22:12:31.990417', 'step': 190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.027243', 'step': 190, 'epoch': 1} {'type': 'loss', 'content': 0.15055201947689056, 'timestamp': '2025-09-30 22:12:32.034016', 'step': 191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.069191', 'step': 191, 'epoch': 1} {'type': 'loss', 'content': 0.33823755383491516, 'timestamp': '2025-09-30 22:12:32.097114', 'step': 192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.128186', 'step': 192, 'epoch': 1} {'type': 'loss', 'content': 0.22585850954055786, 'timestamp': '2025-09-30 22:12:32.131512', 'step': 193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:32.163983', 'step': 193, 'epoch': 1} {'type': 'loss', 'content': 0.24439218640327454, 'timestamp': '2025-09-30 22:12:32.166295', 'step': 194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:32.197689', 'step': 194, 'epoch': 1} {'type': 'loss', 'content': 0.19130977988243103, 'timestamp': '2025-09-30 22:12:32.200392', 'step': 195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.230647', 'step': 195, 'epoch': 1} {'type': 'loss', 'content': 0.22252759337425232, 'timestamp': '2025-09-30 22:12:32.255139', 'step': 196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.284917', 'step': 196, 'epoch': 1} {'type': 'loss', 'content': 0.2056071013212204, 'timestamp': '2025-09-30 22:12:32.288128', 'step': 197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.327275', 'step': 197, 'epoch': 1} {'type': 'loss', 'content': 0.3373158276081085, 'timestamp': '2025-09-30 22:12:32.329654', 'step': 198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.359445', 'step': 198, 'epoch': 1} {'type': 'loss', 'content': 0.13107351958751678, 'timestamp': '2025-09-30 22:12:32.369746', 'step': 199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:32.410794', 'step': 199, 'epoch': 1} {'type': 'loss', 'content': 0.12861809134483337, 'timestamp': '2025-09-30 22:12:32.436838', 'step': 200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:32.468451', 'step': 200, 'epoch': 1} {'type': 'loss', 'content': 0.3499002754688263, 'timestamp': '2025-09-30 22:12:32.471683', 'step': 201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:32.503086', 'step': 201, 'epoch': 1} {'type': 'loss', 'content': 0.18444688618183136, 'timestamp': '2025-09-30 22:12:32.507067', 'step': 202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:32.538804', 'step': 202, 'epoch': 1} {'type': 'loss', 'content': 0.14198069274425507, 'timestamp': '2025-09-30 22:12:32.545202', 'step': 203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.582278', 'step': 203, 'epoch': 1} {'type': 'loss', 'content': 0.24266281723976135, 'timestamp': '2025-09-30 22:12:32.606201', 'step': 204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.642067', 'step': 204, 'epoch': 1} {'type': 'loss', 'content': 0.3227069079875946, 'timestamp': '2025-09-30 22:12:32.645203', 'step': 205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.675445', 'step': 205, 'epoch': 1} {'type': 'loss', 'content': 0.16262656450271606, 'timestamp': '2025-09-30 22:12:32.679703', 'step': 206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:32.714665', 'step': 206, 'epoch': 1} {'type': 'loss', 'content': 0.18501007556915283, 'timestamp': '2025-09-30 22:12:32.721786', 'step': 207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.757905', 'step': 207, 'epoch': 1} {'type': 'loss', 'content': 0.1713544726371765, 'timestamp': '2025-09-30 22:12:32.782033', 'step': 208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.813572', 'step': 208, 'epoch': 1} {'type': 'loss', 'content': 0.19929316639900208, 'timestamp': '2025-09-30 22:12:32.817276', 'step': 209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.852274', 'step': 209, 'epoch': 1} {'type': 'loss', 'content': 0.3341671824455261, 'timestamp': '2025-09-30 22:12:32.855055', 'step': 210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:32.888483', 'step': 210, 'epoch': 1} {'type': 'loss', 'content': 0.1703384518623352, 'timestamp': '2025-09-30 22:12:32.891271', 'step': 211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.926104', 'step': 211, 'epoch': 1} {'type': 'loss', 'content': 0.25355735421180725, 'timestamp': '2025-09-30 22:12:32.950663', 'step': 212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:32.986179', 'step': 212, 'epoch': 1} {'type': 'loss', 'content': 0.15627121925354004, 'timestamp': '2025-09-30 22:12:32.991347', 'step': 213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.022895', 'step': 213, 'epoch': 1} {'type': 'loss', 'content': 0.28310325741767883, 'timestamp': '2025-09-30 22:12:33.029377', 'step': 214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.061509', 'step': 214, 'epoch': 1} {'type': 'loss', 'content': 0.17132028937339783, 'timestamp': '2025-09-30 22:12:33.065090', 'step': 215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.097132', 'step': 215, 'epoch': 1} {'type': 'loss', 'content': 0.32120630145072937, 'timestamp': '2025-09-30 22:12:33.122461', 'step': 216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.157476', 'step': 216, 'epoch': 1} {'type': 'loss', 'content': 0.21154744923114777, 'timestamp': '2025-09-30 22:12:33.163922', 'step': 217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.194718', 'step': 217, 'epoch': 1} {'type': 'loss', 'content': 0.20563653111457825, 'timestamp': '2025-09-30 22:12:33.197814', 'step': 218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.228045', 'step': 218, 'epoch': 1} {'type': 'loss', 'content': 0.32245543599128723, 'timestamp': '2025-09-30 22:12:33.233370', 'step': 219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.267659', 'step': 219, 'epoch': 1} {'type': 'loss', 'content': 0.2608300447463989, 'timestamp': '2025-09-30 22:12:33.295408', 'step': 220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.326649', 'step': 220, 'epoch': 1} {'type': 'loss', 'content': 0.2031194418668747, 'timestamp': '2025-09-30 22:12:33.329713', 'step': 221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.361395', 'step': 221, 'epoch': 1} {'type': 'loss', 'content': 0.1262713074684143, 'timestamp': '2025-09-30 22:12:33.363604', 'step': 222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.397582', 'step': 222, 'epoch': 1} {'type': 'loss', 'content': 0.2203020304441452, 'timestamp': '2025-09-30 22:12:33.400812', 'step': 223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.430521', 'step': 223, 'epoch': 1} {'type': 'loss', 'content': 0.1871601790189743, 'timestamp': '2025-09-30 22:12:33.454275', 'step': 224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.484985', 'step': 224, 'epoch': 1} {'type': 'loss', 'content': 0.17148299515247345, 'timestamp': '2025-09-30 22:12:33.487600', 'step': 225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.518852', 'step': 225, 'epoch': 1} {'type': 'loss', 'content': 0.2346402257680893, 'timestamp': '2025-09-30 22:12:33.528204', 'step': 226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.562831', 'step': 226, 'epoch': 1} {'type': 'loss', 'content': 0.22225214540958405, 'timestamp': '2025-09-30 22:12:33.565452', 'step': 227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:33.595988', 'step': 227, 'epoch': 1} {'type': 'loss', 'content': 0.2076287567615509, 'timestamp': '2025-09-30 22:12:33.623750', 'step': 228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.654457', 'step': 228, 'epoch': 1} {'type': 'loss', 'content': 0.1995013952255249, 'timestamp': '2025-09-30 22:12:33.657729', 'step': 229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:33.699855', 'step': 229, 'epoch': 1} {'type': 'loss', 'content': 0.2618333697319031, 'timestamp': '2025-09-30 22:12:33.703130', 'step': 230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.735048', 'step': 230, 'epoch': 1} {'type': 'loss', 'content': 0.1770174205303192, 'timestamp': '2025-09-30 22:12:33.738589', 'step': 231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.768368', 'step': 231, 'epoch': 1} {'type': 'loss', 'content': 0.2862670421600342, 'timestamp': '2025-09-30 22:12:33.792933', 'step': 232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:33.823593', 'step': 232, 'epoch': 1} {'type': 'loss', 'content': 0.2326173186302185, 'timestamp': '2025-09-30 22:12:33.826901', 'step': 233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:33.858047', 'step': 233, 'epoch': 1} {'type': 'loss', 'content': 0.1887691766023636, 'timestamp': '2025-09-30 22:12:33.860788', 'step': 234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:33.891366', 'step': 234, 'epoch': 1} {'type': 'loss', 'content': 0.24945694208145142, 'timestamp': '2025-09-30 22:12:33.894272', 'step': 235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:33.924561', 'step': 235, 'epoch': 1} {'type': 'loss', 'content': 0.1368529349565506, 'timestamp': '2025-09-30 22:12:33.948851', 'step': 236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:33.979977', 'step': 236, 'epoch': 1} {'type': 'loss', 'content': 0.2553618848323822, 'timestamp': '2025-09-30 22:12:33.983220', 'step': 237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.013697', 'step': 237, 'epoch': 1} {'type': 'loss', 'content': 0.2751067280769348, 'timestamp': '2025-09-30 22:12:34.016394', 'step': 238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.046308', 'step': 238, 'epoch': 1} {'type': 'loss', 'content': 0.17098835110664368, 'timestamp': '2025-09-30 22:12:34.049095', 'step': 239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:34.079437', 'step': 239, 'epoch': 1} {'type': 'loss', 'content': 0.20328401029109955, 'timestamp': '2025-09-30 22:12:34.103137', 'step': 240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.134771', 'step': 240, 'epoch': 1} {'type': 'loss', 'content': 0.2272823005914688, 'timestamp': '2025-09-30 22:12:34.138120', 'step': 241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.169714', 'step': 241, 'epoch': 1} {'type': 'loss', 'content': 0.21298319101333618, 'timestamp': '2025-09-30 22:12:34.176834', 'step': 242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.210590', 'step': 242, 'epoch': 1} {'type': 'loss', 'content': 0.36585527658462524, 'timestamp': '2025-09-30 22:12:34.215589', 'step': 243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:34.246682', 'step': 243, 'epoch': 1} {'type': 'loss', 'content': 0.20944944024085999, 'timestamp': '2025-09-30 22:12:34.272190', 'step': 244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.306668', 'step': 244, 'epoch': 1} {'type': 'loss', 'content': 0.2852195203304291, 'timestamp': '2025-09-30 22:12:34.309846', 'step': 245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.343682', 'step': 245, 'epoch': 1} {'type': 'loss', 'content': 0.18859732151031494, 'timestamp': '2025-09-30 22:12:34.350929', 'step': 246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.381216', 'step': 246, 'epoch': 1} {'type': 'loss', 'content': 0.14888668060302734, 'timestamp': '2025-09-30 22:12:34.390026', 'step': 247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.421102', 'step': 247, 'epoch': 1} {'type': 'loss', 'content': 0.11400340497493744, 'timestamp': '2025-09-30 22:12:34.446198', 'step': 248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:34.477246', 'step': 248, 'epoch': 1} {'type': 'loss', 'content': 0.24689102172851562, 'timestamp': '2025-09-30 22:12:34.479683', 'step': 249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:34.510418', 'step': 249, 'epoch': 1} {'type': 'loss', 'content': 0.19756893813610077, 'timestamp': '2025-09-30 22:12:34.513297', 'step': 250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.548759', 'step': 250, 'epoch': 1} {'type': 'loss', 'content': 0.19745862483978271, 'timestamp': '2025-09-30 22:12:34.552687', 'step': 251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.587911', 'step': 251, 'epoch': 1} {'type': 'loss', 'content': 0.1984383910894394, 'timestamp': '2025-09-30 22:12:34.613372', 'step': 252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.647197', 'step': 252, 'epoch': 1} {'type': 'loss', 'content': 0.2345982939004898, 'timestamp': '2025-09-30 22:12:34.656864', 'step': 253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.687151', 'step': 253, 'epoch': 1} {'type': 'loss', 'content': 0.18396590650081635, 'timestamp': '2025-09-30 22:12:34.689840', 'step': 254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.727336', 'step': 254, 'epoch': 1} {'type': 'loss', 'content': 0.12691006064414978, 'timestamp': '2025-09-30 22:12:34.734030', 'step': 255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.768500', 'step': 255, 'epoch': 1} {'type': 'loss', 'content': 0.13110549747943878, 'timestamp': '2025-09-30 22:12:34.792431', 'step': 256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:34.822913', 'step': 256, 'epoch': 1} {'type': 'loss', 'content': 0.2512013912200928, 'timestamp': '2025-09-30 22:12:34.825636', 'step': 257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.856562', 'step': 257, 'epoch': 1} {'type': 'loss', 'content': 0.14638139307498932, 'timestamp': '2025-09-30 22:12:34.859826', 'step': 258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:34.898129', 'step': 258, 'epoch': 1} {'type': 'loss', 'content': 0.12336121499538422, 'timestamp': '2025-09-30 22:12:34.901228', 'step': 259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.933006', 'step': 259, 'epoch': 1} {'type': 'loss', 'content': 0.24549169838428497, 'timestamp': '2025-09-30 22:12:34.957957', 'step': 260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:34.988570', 'step': 260, 'epoch': 1} {'type': 'loss', 'content': 0.16617660224437714, 'timestamp': '2025-09-30 22:12:34.990723', 'step': 261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.020563', 'step': 261, 'epoch': 1} {'type': 'loss', 'content': 0.1305750161409378, 'timestamp': '2025-09-30 22:12:35.022806', 'step': 262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.055152', 'step': 262, 'epoch': 1} {'type': 'loss', 'content': 0.21464727818965912, 'timestamp': '2025-09-30 22:12:35.061992', 'step': 263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:12:35.114040', 'step': 263, 'epoch': 1} {'type': 'loss', 'content': 0.2459826022386551, 'timestamp': '2025-09-30 22:12:35.142060', 'step': 264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.175101', 'step': 264, 'epoch': 1} {'type': 'loss', 'content': 0.22126570343971252, 'timestamp': '2025-09-30 22:12:35.177811', 'step': 265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.213417', 'step': 265, 'epoch': 1} {'type': 'loss', 'content': 0.13566167652606964, 'timestamp': '2025-09-30 22:12:35.216278', 'step': 266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.250281', 'step': 266, 'epoch': 1} {'type': 'loss', 'content': 0.14007841050624847, 'timestamp': '2025-09-30 22:12:35.252677', 'step': 267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:35.286516', 'step': 267, 'epoch': 1} {'type': 'loss', 'content': 0.30323073267936707, 'timestamp': '2025-09-30 22:12:35.312050', 'step': 268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.342753', 'step': 268, 'epoch': 1} {'type': 'loss', 'content': 0.18608640134334564, 'timestamp': '2025-09-30 22:12:35.349897', 'step': 269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:35.380423', 'step': 269, 'epoch': 1} {'type': 'loss', 'content': 0.14753571152687073, 'timestamp': '2025-09-30 22:12:35.383464', 'step': 270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:35.418651', 'step': 270, 'epoch': 1} {'type': 'loss', 'content': 0.25617632269859314, 'timestamp': '2025-09-30 22:12:35.421515', 'step': 271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.452843', 'step': 271, 'epoch': 1} {'type': 'loss', 'content': 0.2336702197790146, 'timestamp': '2025-09-30 22:12:35.477618', 'step': 272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:35.507402', 'step': 272, 'epoch': 1} {'type': 'loss', 'content': 0.14789006114006042, 'timestamp': '2025-09-30 22:12:35.511437', 'step': 273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.547768', 'step': 273, 'epoch': 1} {'type': 'loss', 'content': 0.20165012776851654, 'timestamp': '2025-09-30 22:12:35.551386', 'step': 274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:35.582546', 'step': 274, 'epoch': 1} {'type': 'loss', 'content': 0.1961856335401535, 'timestamp': '2025-09-30 22:12:35.587541', 'step': 275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.619737', 'step': 275, 'epoch': 1} {'type': 'loss', 'content': 0.15636086463928223, 'timestamp': '2025-09-30 22:12:35.644877', 'step': 276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.676745', 'step': 276, 'epoch': 1} {'type': 'loss', 'content': 0.14000818133354187, 'timestamp': '2025-09-30 22:12:35.681003', 'step': 277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.711838', 'step': 277, 'epoch': 1} {'type': 'loss', 'content': 0.26056599617004395, 'timestamp': '2025-09-30 22:12:35.715522', 'step': 278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.750667', 'step': 278, 'epoch': 1} {'type': 'loss', 'content': 0.18314751982688904, 'timestamp': '2025-09-30 22:12:35.756815', 'step': 279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.787541', 'step': 279, 'epoch': 1} {'type': 'loss', 'content': 0.22026947140693665, 'timestamp': '2025-09-30 22:12:35.816104', 'step': 280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:35.853434', 'step': 280, 'epoch': 1} {'type': 'loss', 'content': 0.17403437197208405, 'timestamp': '2025-09-30 22:12:35.858908', 'step': 281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:35.897036', 'step': 281, 'epoch': 1} {'type': 'loss', 'content': 0.18883927166461945, 'timestamp': '2025-09-30 22:12:35.900400', 'step': 282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:35.931459', 'step': 282, 'epoch': 1} {'type': 'loss', 'content': 0.18662303686141968, 'timestamp': '2025-09-30 22:12:35.935663', 'step': 283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:35.966755', 'step': 283, 'epoch': 1} {'type': 'loss', 'content': 0.15654437243938446, 'timestamp': '2025-09-30 22:12:35.993244', 'step': 284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:36.026567', 'step': 284, 'epoch': 1} {'type': 'loss', 'content': 0.21858464181423187, 'timestamp': '2025-09-30 22:12:36.043594', 'step': 285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.078881', 'step': 285, 'epoch': 1} {'type': 'loss', 'content': 0.2831774950027466, 'timestamp': '2025-09-30 22:12:36.086860', 'step': 286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.121002', 'step': 286, 'epoch': 1} {'type': 'loss', 'content': 0.32450756430625916, 'timestamp': '2025-09-30 22:12:36.126428', 'step': 287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.171320', 'step': 287, 'epoch': 1} {'type': 'loss', 'content': 0.32115456461906433, 'timestamp': '2025-09-30 22:12:36.198586', 'step': 288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:36.231257', 'step': 288, 'epoch': 1} {'type': 'loss', 'content': 0.2788095772266388, 'timestamp': '2025-09-30 22:12:36.235545', 'step': 289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:36.266880', 'step': 289, 'epoch': 1} {'type': 'loss', 'content': 0.1539044976234436, 'timestamp': '2025-09-30 22:12:36.270212', 'step': 290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:36.300822', 'step': 290, 'epoch': 1} {'type': 'loss', 'content': 0.15912963449954987, 'timestamp': '2025-09-30 22:12:36.302949', 'step': 291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.334259', 'step': 291, 'epoch': 1} {'type': 'loss', 'content': 0.28374969959259033, 'timestamp': '2025-09-30 22:12:36.358866', 'step': 292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:36.392701', 'step': 292, 'epoch': 1} {'type': 'loss', 'content': 0.3342394232749939, 'timestamp': '2025-09-30 22:12:36.396824', 'step': 293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.426880', 'step': 293, 'epoch': 1} {'type': 'loss', 'content': 0.1962951123714447, 'timestamp': '2025-09-30 22:12:36.431409', 'step': 294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.462787', 'step': 294, 'epoch': 1} {'type': 'loss', 'content': 0.1895981878042221, 'timestamp': '2025-09-30 22:12:36.474341', 'step': 295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.504112', 'step': 295, 'epoch': 1} {'type': 'loss', 'content': 0.1701125204563141, 'timestamp': '2025-09-30 22:12:36.528022', 'step': 296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:36.560962', 'step': 296, 'epoch': 1} {'type': 'loss', 'content': 0.166366845369339, 'timestamp': '2025-09-30 22:12:36.562904', 'step': 297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.598189', 'step': 297, 'epoch': 1} {'type': 'loss', 'content': 0.13623793423175812, 'timestamp': '2025-09-30 22:12:36.601931', 'step': 298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:36.632016', 'step': 298, 'epoch': 1} {'type': 'loss', 'content': 0.2317434549331665, 'timestamp': '2025-09-30 22:12:36.636321', 'step': 299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.668993', 'step': 299, 'epoch': 1} {'type': 'loss', 'content': 0.21330475807189941, 'timestamp': '2025-09-30 22:12:36.698300', 'step': 300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.730144', 'step': 300, 'epoch': 1} {'type': 'loss', 'content': 0.28717365860939026, 'timestamp': '2025-09-30 22:12:36.732329', 'step': 301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:36.762390', 'step': 301, 'epoch': 1} {'type': 'loss', 'content': 0.2308218777179718, 'timestamp': '2025-09-30 22:12:36.764933', 'step': 302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.795354', 'step': 302, 'epoch': 1} {'type': 'loss', 'content': 0.20969714224338531, 'timestamp': '2025-09-30 22:12:36.798260', 'step': 303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.828832', 'step': 303, 'epoch': 1} {'type': 'loss', 'content': 0.18555022776126862, 'timestamp': '2025-09-30 22:12:36.853350', 'step': 304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:12:36.904259', 'step': 304, 'epoch': 1} {'type': 'loss', 'content': 0.21681083738803864, 'timestamp': '2025-09-30 22:12:36.915297', 'step': 305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:36.947783', 'step': 305, 'epoch': 1} {'type': 'loss', 'content': 0.2838076949119568, 'timestamp': '2025-09-30 22:12:36.953286', 'step': 306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:36.983127', 'step': 306, 'epoch': 1} {'type': 'loss', 'content': 0.2397305965423584, 'timestamp': '2025-09-30 22:12:36.985112', 'step': 307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.015199', 'step': 307, 'epoch': 1} {'type': 'loss', 'content': 0.16854487359523773, 'timestamp': '2025-09-30 22:12:37.038972', 'step': 308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.069588', 'step': 308, 'epoch': 1} {'type': 'loss', 'content': 0.1638893336057663, 'timestamp': '2025-09-30 22:12:37.071673', 'step': 309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:37.102177', 'step': 309, 'epoch': 1} {'type': 'loss', 'content': 0.21170814335346222, 'timestamp': '2025-09-30 22:12:37.105552', 'step': 310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.136784', 'step': 310, 'epoch': 1} {'type': 'loss', 'content': 0.20960471034049988, 'timestamp': '2025-09-30 22:12:37.139175', 'step': 311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:37.174541', 'step': 311, 'epoch': 1} {'type': 'loss', 'content': 0.17089442908763885, 'timestamp': '2025-09-30 22:12:37.198138', 'step': 312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.230738', 'step': 312, 'epoch': 1} {'type': 'loss', 'content': 0.12919281423091888, 'timestamp': '2025-09-30 22:12:37.232765', 'step': 313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.262416', 'step': 313, 'epoch': 1} {'type': 'loss', 'content': 0.20389412343502045, 'timestamp': '2025-09-30 22:12:37.264804', 'step': 314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.298368', 'step': 314, 'epoch': 1} {'type': 'loss', 'content': 0.26271164417266846, 'timestamp': '2025-09-30 22:12:37.302198', 'step': 315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.333112', 'step': 315, 'epoch': 1} {'type': 'loss', 'content': 0.14681516587734222, 'timestamp': '2025-09-30 22:12:37.357034', 'step': 316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:37.389352', 'step': 316, 'epoch': 1} {'type': 'loss', 'content': 0.1822192668914795, 'timestamp': '2025-09-30 22:12:37.392893', 'step': 317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:37.424632', 'step': 317, 'epoch': 1} {'type': 'loss', 'content': 0.13477766513824463, 'timestamp': '2025-09-30 22:12:37.431526', 'step': 318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.465402', 'step': 318, 'epoch': 1} {'type': 'loss', 'content': 0.20504625141620636, 'timestamp': '2025-09-30 22:12:37.467896', 'step': 319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.499531', 'step': 319, 'epoch': 1} {'type': 'loss', 'content': 0.24711214005947113, 'timestamp': '2025-09-30 22:12:37.524467', 'step': 320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:37.554712', 'step': 320, 'epoch': 1} {'type': 'loss', 'content': 0.21844276785850525, 'timestamp': '2025-09-30 22:12:37.557882', 'step': 321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.588512', 'step': 321, 'epoch': 1} {'type': 'loss', 'content': 0.17568258941173553, 'timestamp': '2025-09-30 22:12:37.592253', 'step': 322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.623199', 'step': 322, 'epoch': 1} {'type': 'loss', 'content': 0.1913284808397293, 'timestamp': '2025-09-30 22:12:37.626391', 'step': 323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.665405', 'step': 323, 'epoch': 1} {'type': 'loss', 'content': 0.1666167825460434, 'timestamp': '2025-09-30 22:12:37.695066', 'step': 324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:37.730547', 'step': 324, 'epoch': 1} {'type': 'loss', 'content': 0.1569962203502655, 'timestamp': '2025-09-30 22:12:37.733796', 'step': 325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.766155', 'step': 325, 'epoch': 1} {'type': 'loss', 'content': 0.2687571346759796, 'timestamp': '2025-09-30 22:12:37.772274', 'step': 326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.806567', 'step': 326, 'epoch': 1} {'type': 'loss', 'content': 0.11855714023113251, 'timestamp': '2025-09-30 22:12:37.809674', 'step': 327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:37.840800', 'step': 327, 'epoch': 1} {'type': 'loss', 'content': 0.23309573531150818, 'timestamp': '2025-09-30 22:12:37.869775', 'step': 328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:37.901469', 'step': 328, 'epoch': 1} {'type': 'loss', 'content': 0.26525336503982544, 'timestamp': '2025-09-30 22:12:37.904119', 'step': 329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:37.936141', 'step': 329, 'epoch': 1} {'type': 'loss', 'content': 0.1448935717344284, 'timestamp': '2025-09-30 22:12:37.938661', 'step': 330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:37.970692', 'step': 330, 'epoch': 1} {'type': 'loss', 'content': 0.19032429158687592, 'timestamp': '2025-09-30 22:12:37.978534', 'step': 331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.016238', 'step': 331, 'epoch': 1} {'type': 'loss', 'content': 0.24716807901859283, 'timestamp': '2025-09-30 22:12:38.042269', 'step': 332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.079696', 'step': 332, 'epoch': 1} {'type': 'loss', 'content': 0.14271514117717743, 'timestamp': '2025-09-30 22:12:38.093035', 'step': 333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.124218', 'step': 333, 'epoch': 1} {'type': 'loss', 'content': 0.27267441153526306, 'timestamp': '2025-09-30 22:12:38.129264', 'step': 334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.161263', 'step': 334, 'epoch': 1} {'type': 'loss', 'content': 0.11793342977762222, 'timestamp': '2025-09-30 22:12:38.169532', 'step': 335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:38.206262', 'step': 335, 'epoch': 1} {'type': 'loss', 'content': 0.19382411241531372, 'timestamp': '2025-09-30 22:12:38.230349', 'step': 336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.263309', 'step': 336, 'epoch': 1} {'type': 'loss', 'content': 0.20427565276622772, 'timestamp': '2025-09-30 22:12:38.269501', 'step': 337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:38.304367', 'step': 337, 'epoch': 1} {'type': 'loss', 'content': 0.16805399954319, 'timestamp': '2025-09-30 22:12:38.307811', 'step': 338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:38.338693', 'step': 338, 'epoch': 1} {'type': 'loss', 'content': 0.29042863845825195, 'timestamp': '2025-09-30 22:12:38.341154', 'step': 339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.373729', 'step': 339, 'epoch': 1} {'type': 'loss', 'content': 0.2570033073425293, 'timestamp': '2025-09-30 22:12:38.397769', 'step': 340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:38.428618', 'step': 340, 'epoch': 1} {'type': 'loss', 'content': 0.21855278313159943, 'timestamp': '2025-09-30 22:12:38.432464', 'step': 341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:38.462551', 'step': 341, 'epoch': 1} {'type': 'loss', 'content': 0.16440656781196594, 'timestamp': '2025-09-30 22:12:38.465403', 'step': 342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.495452', 'step': 342, 'epoch': 1} {'type': 'loss', 'content': 0.1784868687391281, 'timestamp': '2025-09-30 22:12:38.497481', 'step': 343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.527930', 'step': 343, 'epoch': 1} {'type': 'loss', 'content': 0.15461938083171844, 'timestamp': '2025-09-30 22:12:38.551771', 'step': 344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:38.589281', 'step': 344, 'epoch': 1} {'type': 'loss', 'content': 0.2744407057762146, 'timestamp': '2025-09-30 22:12:38.593784', 'step': 345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.625754', 'step': 345, 'epoch': 1} {'type': 'loss', 'content': 0.23044362664222717, 'timestamp': '2025-09-30 22:12:38.630530', 'step': 346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.663272', 'step': 346, 'epoch': 1} {'type': 'loss', 'content': 0.2658284306526184, 'timestamp': '2025-09-30 22:12:38.665986', 'step': 347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.699235', 'step': 347, 'epoch': 1} {'type': 'loss', 'content': 0.20766402781009674, 'timestamp': '2025-09-30 22:12:38.725752', 'step': 348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.756120', 'step': 348, 'epoch': 1} {'type': 'loss', 'content': 0.18925420939922333, 'timestamp': '2025-09-30 22:12:38.760948', 'step': 349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:38.796094', 'step': 349, 'epoch': 1} {'type': 'loss', 'content': 0.16099868714809418, 'timestamp': '2025-09-30 22:12:38.798942', 'step': 350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:38.831868', 'step': 350, 'epoch': 1} {'type': 'loss', 'content': 0.17792843282222748, 'timestamp': '2025-09-30 22:12:38.836274', 'step': 351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:38.872576', 'step': 351, 'epoch': 1} {'type': 'loss', 'content': 0.18870532512664795, 'timestamp': '2025-09-30 22:12:38.899197', 'step': 352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:38.930531', 'step': 352, 'epoch': 1} {'type': 'loss', 'content': 0.12844309210777283, 'timestamp': '2025-09-30 22:12:38.933060', 'step': 353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:38.963680', 'step': 353, 'epoch': 1} {'type': 'loss', 'content': 0.11020556837320328, 'timestamp': '2025-09-30 22:12:38.970127', 'step': 354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.001227', 'step': 354, 'epoch': 1} {'type': 'loss', 'content': 0.2060793787240982, 'timestamp': '2025-09-30 22:12:39.004384', 'step': 355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:39.037920', 'step': 355, 'epoch': 1} {'type': 'loss', 'content': 0.3579075336456299, 'timestamp': '2025-09-30 22:12:39.062120', 'step': 356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:39.093424', 'step': 356, 'epoch': 1} {'type': 'loss', 'content': 0.20722779631614685, 'timestamp': '2025-09-30 22:12:39.098412', 'step': 357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:39.132452', 'step': 357, 'epoch': 1} {'type': 'loss', 'content': 0.08777348697185516, 'timestamp': '2025-09-30 22:12:39.135227', 'step': 358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.166124', 'step': 358, 'epoch': 1} {'type': 'loss', 'content': 0.21207410097122192, 'timestamp': '2025-09-30 22:12:39.172047', 'step': 359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.206708', 'step': 359, 'epoch': 1} {'type': 'loss', 'content': 0.2207736372947693, 'timestamp': '2025-09-30 22:12:39.234570', 'step': 360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.265072', 'step': 360, 'epoch': 1} {'type': 'loss', 'content': 0.1520823836326599, 'timestamp': '2025-09-30 22:12:39.269979', 'step': 361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.302963', 'step': 361, 'epoch': 1} {'type': 'loss', 'content': 0.1891416758298874, 'timestamp': '2025-09-30 22:12:39.305714', 'step': 362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:39.340854', 'step': 362, 'epoch': 1} {'type': 'loss', 'content': 0.14305523037910461, 'timestamp': '2025-09-30 22:12:39.343043', 'step': 363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.373599', 'step': 363, 'epoch': 1} {'type': 'loss', 'content': 0.19531883299350739, 'timestamp': '2025-09-30 22:12:39.400249', 'step': 364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:39.431098', 'step': 364, 'epoch': 1} {'type': 'loss', 'content': 0.2741473913192749, 'timestamp': '2025-09-30 22:12:39.436878', 'step': 365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.469696', 'step': 365, 'epoch': 1} {'type': 'loss', 'content': 0.1720801293849945, 'timestamp': '2025-09-30 22:12:39.473095', 'step': 366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:39.504681', 'step': 366, 'epoch': 1} {'type': 'loss', 'content': 0.319174587726593, 'timestamp': '2025-09-30 22:12:39.507486', 'step': 367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.540194', 'step': 367, 'epoch': 1} {'type': 'loss', 'content': 0.14895781874656677, 'timestamp': '2025-09-30 22:12:39.563853', 'step': 368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.599427', 'step': 368, 'epoch': 1} {'type': 'loss', 'content': 0.2550048530101776, 'timestamp': '2025-09-30 22:12:39.601925', 'step': 369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.632796', 'step': 369, 'epoch': 1} {'type': 'loss', 'content': 0.12251503765583038, 'timestamp': '2025-09-30 22:12:39.635957', 'step': 370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.668998', 'step': 370, 'epoch': 1} {'type': 'loss', 'content': 0.21980006992816925, 'timestamp': '2025-09-30 22:12:39.675959', 'step': 371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.711282', 'step': 371, 'epoch': 1} {'type': 'loss', 'content': 0.2658177316188812, 'timestamp': '2025-09-30 22:12:39.736057', 'step': 372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:39.768066', 'step': 372, 'epoch': 1} {'type': 'loss', 'content': 0.14116021990776062, 'timestamp': '2025-09-30 22:12:39.773454', 'step': 373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.808228', 'step': 373, 'epoch': 1} {'type': 'loss', 'content': 0.19012333452701569, 'timestamp': '2025-09-30 22:12:39.812335', 'step': 374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.843056', 'step': 374, 'epoch': 1} {'type': 'loss', 'content': 0.1529729962348938, 'timestamp': '2025-09-30 22:12:39.849024', 'step': 375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:39.883601', 'step': 375, 'epoch': 1} {'type': 'loss', 'content': 0.13863234221935272, 'timestamp': '2025-09-30 22:12:39.907371', 'step': 376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.937335', 'step': 376, 'epoch': 1} {'type': 'loss', 'content': 0.2027798593044281, 'timestamp': '2025-09-30 22:12:39.939565', 'step': 377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:39.974882', 'step': 377, 'epoch': 1} {'type': 'loss', 'content': 0.16701386868953705, 'timestamp': '2025-09-30 22:12:39.976994', 'step': 378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.007763', 'step': 378, 'epoch': 1} {'type': 'loss', 'content': 0.19729579985141754, 'timestamp': '2025-09-30 22:12:40.010224', 'step': 379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:40.041159', 'step': 379, 'epoch': 1} {'type': 'loss', 'content': 0.2809867858886719, 'timestamp': '2025-09-30 22:12:40.064531', 'step': 380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.094074', 'step': 380, 'epoch': 1} {'type': 'loss', 'content': 0.23976080119609833, 'timestamp': '2025-09-30 22:12:40.095991', 'step': 381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.125471', 'step': 381, 'epoch': 1} {'type': 'loss', 'content': 0.31902921199798584, 'timestamp': '2025-09-30 22:12:40.127435', 'step': 382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:40.157097', 'step': 382, 'epoch': 1} {'type': 'loss', 'content': 0.14287422597408295, 'timestamp': '2025-09-30 22:12:40.160925', 'step': 383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.190470', 'step': 383, 'epoch': 1} {'type': 'loss', 'content': 0.23580941557884216, 'timestamp': '2025-09-30 22:12:40.214527', 'step': 384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:40.244881', 'step': 384, 'epoch': 1} {'type': 'loss', 'content': 0.19938893616199493, 'timestamp': '2025-09-30 22:12:40.246945', 'step': 385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.276793', 'step': 385, 'epoch': 1} {'type': 'loss', 'content': 0.18076087534427643, 'timestamp': '2025-09-30 22:12:40.278839', 'step': 386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.308383', 'step': 386, 'epoch': 1} {'type': 'loss', 'content': 0.12790396809577942, 'timestamp': '2025-09-30 22:12:40.310416', 'step': 387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:40.341191', 'step': 387, 'epoch': 1} {'type': 'loss', 'content': 0.2306796908378601, 'timestamp': '2025-09-30 22:12:40.369215', 'step': 388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.398928', 'step': 388, 'epoch': 1} {'type': 'loss', 'content': 0.1792302429676056, 'timestamp': '2025-09-30 22:12:40.402314', 'step': 389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:40.436079', 'step': 389, 'epoch': 1} {'type': 'loss', 'content': 0.1969059407711029, 'timestamp': '2025-09-30 22:12:40.438191', 'step': 390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.470516', 'step': 390, 'epoch': 1} {'type': 'loss', 'content': 0.22311915457248688, 'timestamp': '2025-09-30 22:12:40.472547', 'step': 391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:40.507322', 'step': 391, 'epoch': 1} {'type': 'loss', 'content': 0.27157798409461975, 'timestamp': '2025-09-30 22:12:40.531901', 'step': 392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.575690', 'step': 392, 'epoch': 1} {'type': 'loss', 'content': 0.17843948304653168, 'timestamp': '2025-09-30 22:12:40.577669', 'step': 393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.614096', 'step': 393, 'epoch': 1} {'type': 'loss', 'content': 0.14876507222652435, 'timestamp': '2025-09-30 22:12:40.618221', 'step': 394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:40.655883', 'step': 394, 'epoch': 1} {'type': 'loss', 'content': 0.29509395360946655, 'timestamp': '2025-09-30 22:12:40.658155', 'step': 395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.689782', 'step': 395, 'epoch': 1} {'type': 'loss', 'content': 0.1203492134809494, 'timestamp': '2025-09-30 22:12:40.713016', 'step': 396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:40.745836', 'step': 396, 'epoch': 1} {'type': 'loss', 'content': 0.1582430899143219, 'timestamp': '2025-09-30 22:12:40.750349', 'step': 397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:40.787503', 'step': 397, 'epoch': 1} {'type': 'loss', 'content': 0.25869515538215637, 'timestamp': '2025-09-30 22:12:40.809349', 'step': 398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:40.846334', 'step': 398, 'epoch': 1} {'type': 'loss', 'content': 0.22262664139270782, 'timestamp': '2025-09-30 22:12:40.850341', 'step': 399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:40.893675', 'step': 399, 'epoch': 1} {'type': 'loss', 'content': 0.1499076634645462, 'timestamp': '2025-09-30 22:12:40.918045', 'step': 400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:40.958241', 'step': 400, 'epoch': 1} {'type': 'loss', 'content': 0.2457716166973114, 'timestamp': '2025-09-30 22:12:40.963015', 'step': 401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:41.012907', 'step': 401, 'epoch': 1} {'type': 'loss', 'content': 0.19683599472045898, 'timestamp': '2025-09-30 22:12:41.017227', 'step': 402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:41.067765', 'step': 402, 'epoch': 1} {'type': 'loss', 'content': 0.1697692573070526, 'timestamp': '2025-09-30 22:12:41.072629', 'step': 403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.112160', 'step': 403, 'epoch': 1} {'type': 'loss', 'content': 0.14672093093395233, 'timestamp': '2025-09-30 22:12:41.137443', 'step': 404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.176122', 'step': 404, 'epoch': 1} {'type': 'loss', 'content': 0.19693993031978607, 'timestamp': '2025-09-30 22:12:41.179671', 'step': 405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:41.221957', 'step': 405, 'epoch': 1} {'type': 'loss', 'content': 0.19638247787952423, 'timestamp': '2025-09-30 22:12:41.226872', 'step': 406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.258901', 'step': 406, 'epoch': 1} {'type': 'loss', 'content': 0.14376883208751678, 'timestamp': '2025-09-30 22:12:41.261023', 'step': 407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:41.300190', 'step': 407, 'epoch': 1} {'type': 'loss', 'content': 0.20242761075496674, 'timestamp': '2025-09-30 22:12:41.327802', 'step': 408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:41.380561', 'step': 408, 'epoch': 1} {'type': 'loss', 'content': 0.14542238414287567, 'timestamp': '2025-09-30 22:12:41.384495', 'step': 409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.426165', 'step': 409, 'epoch': 1} {'type': 'loss', 'content': 0.21602413058280945, 'timestamp': '2025-09-30 22:12:41.433900', 'step': 410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.478894', 'step': 410, 'epoch': 1} {'type': 'loss', 'content': 0.24729472398757935, 'timestamp': '2025-09-30 22:12:41.495373', 'step': 411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:41.547266', 'step': 411, 'epoch': 1} {'type': 'loss', 'content': 0.20109565556049347, 'timestamp': '2025-09-30 22:12:41.575012', 'step': 412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.618420', 'step': 412, 'epoch': 1} {'type': 'loss', 'content': 0.19785641133785248, 'timestamp': '2025-09-30 22:12:41.621691', 'step': 413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:41.674175', 'step': 413, 'epoch': 1} {'type': 'loss', 'content': 0.21567970514297485, 'timestamp': '2025-09-30 22:12:41.679473', 'step': 414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:41.713572', 'step': 414, 'epoch': 1} {'type': 'loss', 'content': 0.1692335307598114, 'timestamp': '2025-09-30 22:12:41.720655', 'step': 415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:41.752717', 'step': 415, 'epoch': 1} {'type': 'loss', 'content': 0.1649394929409027, 'timestamp': '2025-09-30 22:12:41.778770', 'step': 416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:41.808367', 'step': 416, 'epoch': 1} {'type': 'loss', 'content': 0.1098475232720375, 'timestamp': '2025-09-30 22:12:41.812744', 'step': 417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:41.842826', 'step': 417, 'epoch': 1} {'type': 'loss', 'content': 0.1923251748085022, 'timestamp': '2025-09-30 22:12:41.845691', 'step': 418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:41.878053', 'step': 418, 'epoch': 1} {'type': 'loss', 'content': 0.11526881903409958, 'timestamp': '2025-09-30 22:12:41.880676', 'step': 419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:41.916648', 'step': 419, 'epoch': 1} {'type': 'loss', 'content': 0.1512768417596817, 'timestamp': '2025-09-30 22:12:41.940657', 'step': 420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:41.970545', 'step': 420, 'epoch': 1} {'type': 'loss', 'content': 0.16923151910305023, 'timestamp': '2025-09-30 22:12:41.973540', 'step': 421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.004733', 'step': 421, 'epoch': 1} {'type': 'loss', 'content': 0.2100025862455368, 'timestamp': '2025-09-30 22:12:42.006898', 'step': 422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.036782', 'step': 422, 'epoch': 1} {'type': 'loss', 'content': 0.2352323979139328, 'timestamp': '2025-09-30 22:12:42.039300', 'step': 423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.068684', 'step': 423, 'epoch': 1} {'type': 'loss', 'content': 0.16532428562641144, 'timestamp': '2025-09-30 22:12:42.092347', 'step': 424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.122412', 'step': 424, 'epoch': 1} {'type': 'loss', 'content': 0.15111249685287476, 'timestamp': '2025-09-30 22:12:42.124581', 'step': 425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.161244', 'step': 425, 'epoch': 1} {'type': 'loss', 'content': 0.20015226304531097, 'timestamp': '2025-09-30 22:12:42.163425', 'step': 426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.193300', 'step': 426, 'epoch': 1} {'type': 'loss', 'content': 0.3414633870124817, 'timestamp': '2025-09-30 22:12:42.196069', 'step': 427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:42.226364', 'step': 427, 'epoch': 1} {'type': 'loss', 'content': 0.22432275116443634, 'timestamp': '2025-09-30 22:12:42.250208', 'step': 428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.279896', 'step': 428, 'epoch': 1} {'type': 'loss', 'content': 0.16725100576877594, 'timestamp': '2025-09-30 22:12:42.281814', 'step': 429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.312921', 'step': 429, 'epoch': 1} {'type': 'loss', 'content': 0.12358289211988449, 'timestamp': '2025-09-30 22:12:42.315664', 'step': 430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:42.351383', 'step': 430, 'epoch': 1} {'type': 'loss', 'content': 0.21085432171821594, 'timestamp': '2025-09-30 22:12:42.354230', 'step': 431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.384370', 'step': 431, 'epoch': 1} {'type': 'loss', 'content': 0.16210338473320007, 'timestamp': '2025-09-30 22:12:42.414956', 'step': 432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.445005', 'step': 432, 'epoch': 1} {'type': 'loss', 'content': 0.1353171318769455, 'timestamp': '2025-09-30 22:12:42.446930', 'step': 433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.477598', 'step': 433, 'epoch': 1} {'type': 'loss', 'content': 0.16940553486347198, 'timestamp': '2025-09-30 22:12:42.480231', 'step': 434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.512800', 'step': 434, 'epoch': 1} {'type': 'loss', 'content': 0.3154766261577606, 'timestamp': '2025-09-30 22:12:42.519626', 'step': 435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.553874', 'step': 435, 'epoch': 1} {'type': 'loss', 'content': 0.11506940424442291, 'timestamp': '2025-09-30 22:12:42.580675', 'step': 436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.616838', 'step': 436, 'epoch': 1} {'type': 'loss', 'content': 0.22916077077388763, 'timestamp': '2025-09-30 22:12:42.621252', 'step': 437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:42.652225', 'step': 437, 'epoch': 1} {'type': 'loss', 'content': 0.23440507054328918, 'timestamp': '2025-09-30 22:12:42.654270', 'step': 438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:42.684837', 'step': 438, 'epoch': 1} {'type': 'loss', 'content': 0.12856492400169373, 'timestamp': '2025-09-30 22:12:42.687139', 'step': 439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.717033', 'step': 439, 'epoch': 1} {'type': 'loss', 'content': 0.2444789707660675, 'timestamp': '2025-09-30 22:12:42.742255', 'step': 440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:42.772163', 'step': 440, 'epoch': 1} {'type': 'loss', 'content': 0.17371368408203125, 'timestamp': '2025-09-30 22:12:42.774259', 'step': 441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.804300', 'step': 441, 'epoch': 1} {'type': 'loss', 'content': 0.18151919543743134, 'timestamp': '2025-09-30 22:12:42.807348', 'step': 442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.837886', 'step': 442, 'epoch': 1} {'type': 'loss', 'content': 0.18040300905704498, 'timestamp': '2025-09-30 22:12:42.841733', 'step': 443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.873759', 'step': 443, 'epoch': 1} {'type': 'loss', 'content': 0.20653197169303894, 'timestamp': '2025-09-30 22:12:42.900472', 'step': 444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.931081', 'step': 444, 'epoch': 1} {'type': 'loss', 'content': 0.2702445983886719, 'timestamp': '2025-09-30 22:12:42.934668', 'step': 445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:42.966725', 'step': 445, 'epoch': 1} {'type': 'loss', 'content': 0.24432018399238586, 'timestamp': '2025-09-30 22:12:42.970998', 'step': 446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:43.002613', 'step': 446, 'epoch': 1} {'type': 'loss', 'content': 0.2525063455104828, 'timestamp': '2025-09-30 22:12:43.006333', 'step': 447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:43.038778', 'step': 447, 'epoch': 1} {'type': 'loss', 'content': 0.18048660457134247, 'timestamp': '2025-09-30 22:12:43.064614', 'step': 448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.095005', 'step': 448, 'epoch': 1} {'type': 'loss', 'content': 0.25361496210098267, 'timestamp': '2025-09-30 22:12:43.109006', 'step': 449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.142198', 'step': 449, 'epoch': 1} {'type': 'loss', 'content': 0.16758745908737183, 'timestamp': '2025-09-30 22:12:43.146437', 'step': 450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.179076', 'step': 450, 'epoch': 1} {'type': 'loss', 'content': 0.17529723048210144, 'timestamp': '2025-09-30 22:12:43.193475', 'step': 451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:43.225460', 'step': 451, 'epoch': 1} {'type': 'loss', 'content': 0.21666847169399261, 'timestamp': '2025-09-30 22:12:43.249887', 'step': 452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:43.282445', 'step': 452, 'epoch': 1} {'type': 'loss', 'content': 0.22526417672634125, 'timestamp': '2025-09-30 22:12:43.285771', 'step': 453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:43.316585', 'step': 453, 'epoch': 1} {'type': 'loss', 'content': 0.19121956825256348, 'timestamp': '2025-09-30 22:12:43.319284', 'step': 454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:43.350270', 'step': 454, 'epoch': 1} {'type': 'loss', 'content': 0.2346850037574768, 'timestamp': '2025-09-30 22:12:43.356675', 'step': 455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:43.392272', 'step': 455, 'epoch': 1} {'type': 'loss', 'content': 0.23846732079982758, 'timestamp': '2025-09-30 22:12:43.416429', 'step': 456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:43.446976', 'step': 456, 'epoch': 1} {'type': 'loss', 'content': 0.23968757688999176, 'timestamp': '2025-09-30 22:12:43.449999', 'step': 457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:43.480270', 'step': 457, 'epoch': 1} {'type': 'loss', 'content': 0.1853356808423996, 'timestamp': '2025-09-30 22:12:43.484056', 'step': 458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:43.514183', 'step': 458, 'epoch': 1} {'type': 'loss', 'content': 0.24225205183029175, 'timestamp': '2025-09-30 22:12:43.516289', 'step': 459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:43.552636', 'step': 459, 'epoch': 1} {'type': 'loss', 'content': 0.23977617919445038, 'timestamp': '2025-09-30 22:12:43.582600', 'step': 460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:43.612689', 'step': 460, 'epoch': 1} {'type': 'loss', 'content': 0.2271365225315094, 'timestamp': '2025-09-30 22:12:43.620963', 'step': 461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.657609', 'step': 461, 'epoch': 1} {'type': 'loss', 'content': 0.23305128514766693, 'timestamp': '2025-09-30 22:12:43.664566', 'step': 462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:43.702231', 'step': 462, 'epoch': 1} {'type': 'loss', 'content': 0.17543257772922516, 'timestamp': '2025-09-30 22:12:43.704919', 'step': 463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.735939', 'step': 463, 'epoch': 1} {'type': 'loss', 'content': 0.24865780770778656, 'timestamp': '2025-09-30 22:12:43.766063', 'step': 464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:43.797187', 'step': 464, 'epoch': 1} {'type': 'loss', 'content': 0.14394335448741913, 'timestamp': '2025-09-30 22:12:43.804979', 'step': 465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:43.835426', 'step': 465, 'epoch': 1} {'type': 'loss', 'content': 0.2119847536087036, 'timestamp': '2025-09-30 22:12:43.839058', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:12:51.252201', 'step': 466, 'epoch': 1} {'type': 'pplx', 'content': 7699.096027368195, 'timestamp': '2025-09-30 22:12:51.255531', 'step': 466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.284623', 'step': 466, 'epoch': 1} {'type': 'loss', 'content': 0.2204313725233078, 'timestamp': '2025-09-30 22:12:51.288240', 'step': 467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.323014', 'step': 467, 'epoch': 1} {'type': 'loss', 'content': 0.1956435590982437, 'timestamp': '2025-09-30 22:12:51.348561', 'step': 468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.379865', 'step': 468, 'epoch': 1} {'type': 'loss', 'content': 0.20771749317646027, 'timestamp': '2025-09-30 22:12:51.386355', 'step': 469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.417770', 'step': 469, 'epoch': 1} {'type': 'loss', 'content': 0.10440605878829956, 'timestamp': '2025-09-30 22:12:51.419933', 'step': 470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:51.450992', 'step': 470, 'epoch': 1} {'type': 'loss', 'content': 0.23590750992298126, 'timestamp': '2025-09-30 22:12:51.454316', 'step': 471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.490285', 'step': 471, 'epoch': 1} {'type': 'loss', 'content': 0.24000336229801178, 'timestamp': '2025-09-30 22:12:51.515089', 'step': 472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:51.545801', 'step': 472, 'epoch': 1} {'type': 'loss', 'content': 0.17773102223873138, 'timestamp': '2025-09-30 22:12:51.549282', 'step': 473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:51.583808', 'step': 473, 'epoch': 1} {'type': 'loss', 'content': 0.09809073060750961, 'timestamp': '2025-09-30 22:12:51.588712', 'step': 474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:51.624643', 'step': 474, 'epoch': 1} {'type': 'loss', 'content': 0.2890790104866028, 'timestamp': '2025-09-30 22:12:51.632361', 'step': 475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.663163', 'step': 475, 'epoch': 1} {'type': 'loss', 'content': 0.1568359136581421, 'timestamp': '2025-09-30 22:12:51.688287', 'step': 476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:51.718462', 'step': 476, 'epoch': 1} {'type': 'loss', 'content': 0.15606801211833954, 'timestamp': '2025-09-30 22:12:51.721519', 'step': 477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:51.752283', 'step': 477, 'epoch': 1} {'type': 'loss', 'content': 0.1254327893257141, 'timestamp': '2025-09-30 22:12:51.755062', 'step': 478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:51.792419', 'step': 478, 'epoch': 1} {'type': 'loss', 'content': 0.16553863883018494, 'timestamp': '2025-09-30 22:12:51.796568', 'step': 479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:51.827327', 'step': 479, 'epoch': 1} {'type': 'loss', 'content': 0.3128201961517334, 'timestamp': '2025-09-30 22:12:51.851497', 'step': 480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:51.881419', 'step': 480, 'epoch': 1} {'type': 'loss', 'content': 0.20916490256786346, 'timestamp': '2025-09-30 22:12:51.886336', 'step': 481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:51.917727', 'step': 481, 'epoch': 1} {'type': 'loss', 'content': 0.175765261054039, 'timestamp': '2025-09-30 22:12:51.921075', 'step': 482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:51.951158', 'step': 482, 'epoch': 1} {'type': 'loss', 'content': 0.1522664725780487, 'timestamp': '2025-09-30 22:12:51.955934', 'step': 483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:51.988622', 'step': 483, 'epoch': 1} {'type': 'loss', 'content': 0.25321489572525024, 'timestamp': '2025-09-30 22:12:52.017501', 'step': 484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:52.049506', 'step': 484, 'epoch': 1} {'type': 'loss', 'content': 0.17242322862148285, 'timestamp': '2025-09-30 22:12:52.052121', 'step': 485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.083262', 'step': 485, 'epoch': 1} {'type': 'loss', 'content': 0.1623527556657791, 'timestamp': '2025-09-30 22:12:52.086400', 'step': 486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:52.127927', 'step': 486, 'epoch': 1} {'type': 'loss', 'content': 0.11540372669696808, 'timestamp': '2025-09-30 22:12:52.132285', 'step': 487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.166466', 'step': 487, 'epoch': 1} {'type': 'loss', 'content': 0.260820597410202, 'timestamp': '2025-09-30 22:12:52.191775', 'step': 488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:52.222874', 'step': 488, 'epoch': 1} {'type': 'loss', 'content': 0.27466192841529846, 'timestamp': '2025-09-30 22:12:52.226840', 'step': 489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.257709', 'step': 489, 'epoch': 1} {'type': 'loss', 'content': 0.15955513715744019, 'timestamp': '2025-09-30 22:12:52.260336', 'step': 490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:52.294194', 'step': 490, 'epoch': 1} {'type': 'loss', 'content': 0.2083207368850708, 'timestamp': '2025-09-30 22:12:52.298259', 'step': 491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.333817', 'step': 491, 'epoch': 1} {'type': 'loss', 'content': 0.2101074606180191, 'timestamp': '2025-09-30 22:12:52.363211', 'step': 492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.395101', 'step': 492, 'epoch': 1} {'type': 'loss', 'content': 0.12347009778022766, 'timestamp': '2025-09-30 22:12:52.398705', 'step': 493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:52.434223', 'step': 493, 'epoch': 1} {'type': 'loss', 'content': 0.23326662182807922, 'timestamp': '2025-09-30 22:12:52.443621', 'step': 494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.481822', 'step': 494, 'epoch': 1} {'type': 'loss', 'content': 0.23541072010993958, 'timestamp': '2025-09-30 22:12:52.484619', 'step': 495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:52.516938', 'step': 495, 'epoch': 1} {'type': 'loss', 'content': 0.25376924872398376, 'timestamp': '2025-09-30 22:12:52.547183', 'step': 496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:12:52.577885', 'step': 496, 'epoch': 1} {'type': 'loss', 'content': 0.27184632420539856, 'timestamp': '2025-09-30 22:12:52.580126', 'step': 497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:52.610794', 'step': 497, 'epoch': 1} {'type': 'loss', 'content': 0.20958389341831207, 'timestamp': '2025-09-30 22:12:52.613261', 'step': 498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:52.643691', 'step': 498, 'epoch': 1} {'type': 'loss', 'content': 0.2091521918773651, 'timestamp': '2025-09-30 22:12:52.647487', 'step': 499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:52.678834', 'step': 499, 'epoch': 1} {'type': 'loss', 'content': 0.14614976942539215, 'timestamp': '2025-09-30 22:12:52.708033', 'step': 500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 500', 'timestamp': '2025-09-30 22:12:57.139511', 'step': 500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:57.170139', 'step': 500, 'epoch': 1} {'type': 'loss', 'content': 0.08959335833787918, 'timestamp': '2025-09-30 22:12:57.172865', 'step': 501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.204287', 'step': 501, 'epoch': 1} {'type': 'loss', 'content': 0.2284584641456604, 'timestamp': '2025-09-30 22:12:57.212112', 'step': 502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:57.242531', 'step': 502, 'epoch': 1} {'type': 'loss', 'content': 0.14405730366706848, 'timestamp': '2025-09-30 22:12:57.245232', 'step': 503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:57.274878', 'step': 503, 'epoch': 1} {'type': 'loss', 'content': 0.1449897736310959, 'timestamp': '2025-09-30 22:12:57.298999', 'step': 504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:57.329327', 'step': 504, 'epoch': 1} {'type': 'loss', 'content': 0.1671493500471115, 'timestamp': '2025-09-30 22:12:57.332462', 'step': 505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:57.366365', 'step': 505, 'epoch': 1} {'type': 'loss', 'content': 0.19446389377117157, 'timestamp': '2025-09-30 22:12:57.374757', 'step': 506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.405050', 'step': 506, 'epoch': 1} {'type': 'loss', 'content': 0.26045164465904236, 'timestamp': '2025-09-30 22:12:57.407954', 'step': 507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.443907', 'step': 507, 'epoch': 1} {'type': 'loss', 'content': 0.1282079964876175, 'timestamp': '2025-09-30 22:12:57.473985', 'step': 508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:57.505159', 'step': 508, 'epoch': 1} {'type': 'loss', 'content': 0.17473597824573517, 'timestamp': '2025-09-30 22:12:57.512405', 'step': 509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.542571', 'step': 509, 'epoch': 1} {'type': 'loss', 'content': 0.15454551577568054, 'timestamp': '2025-09-30 22:12:57.545479', 'step': 510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:57.575731', 'step': 510, 'epoch': 1} {'type': 'loss', 'content': 0.11857569962739944, 'timestamp': '2025-09-30 22:12:57.578861', 'step': 511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.611996', 'step': 511, 'epoch': 1} {'type': 'loss', 'content': 0.14201049506664276, 'timestamp': '2025-09-30 22:12:57.636796', 'step': 512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.671082', 'step': 512, 'epoch': 1} {'type': 'loss', 'content': 0.28117209672927856, 'timestamp': '2025-09-30 22:12:57.678239', 'step': 513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:57.709195', 'step': 513, 'epoch': 1} {'type': 'loss', 'content': 0.16937854886054993, 'timestamp': '2025-09-30 22:12:57.716715', 'step': 514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:57.750257', 'step': 514, 'epoch': 1} {'type': 'loss', 'content': 0.21208861470222473, 'timestamp': '2025-09-30 22:12:57.758261', 'step': 515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:57.789222', 'step': 515, 'epoch': 1} {'type': 'loss', 'content': 0.20310534536838531, 'timestamp': '2025-09-30 22:12:57.813854', 'step': 516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.855696', 'step': 516, 'epoch': 1} {'type': 'loss', 'content': 0.21742184460163116, 'timestamp': '2025-09-30 22:12:57.862660', 'step': 517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:57.900935', 'step': 517, 'epoch': 1} {'type': 'loss', 'content': 0.1366376429796219, 'timestamp': '2025-09-30 22:12:57.904235', 'step': 518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:57.939716', 'step': 518, 'epoch': 1} {'type': 'loss', 'content': 0.212642639875412, 'timestamp': '2025-09-30 22:12:57.947569', 'step': 519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:57.979132', 'step': 519, 'epoch': 1} {'type': 'loss', 'content': 0.1682087481021881, 'timestamp': '2025-09-30 22:12:58.006219', 'step': 520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:12:58.036214', 'step': 520, 'epoch': 1} {'type': 'loss', 'content': 0.13779732584953308, 'timestamp': '2025-09-30 22:12:58.038817', 'step': 521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.074605', 'step': 521, 'epoch': 1} {'type': 'loss', 'content': 0.3008333742618561, 'timestamp': '2025-09-30 22:12:58.078041', 'step': 522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.107434', 'step': 522, 'epoch': 1} {'type': 'loss', 'content': 0.19692370295524597, 'timestamp': '2025-09-30 22:12:58.116471', 'step': 523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:58.148830', 'step': 523, 'epoch': 1} {'type': 'loss', 'content': 0.18999657034873962, 'timestamp': '2025-09-30 22:12:58.172953', 'step': 524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:58.202816', 'step': 524, 'epoch': 1} {'type': 'loss', 'content': 0.09978242963552475, 'timestamp': '2025-09-30 22:12:58.208795', 'step': 525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:58.244581', 'step': 525, 'epoch': 1} {'type': 'loss', 'content': 0.2687797546386719, 'timestamp': '2025-09-30 22:12:58.248116', 'step': 526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:58.280479', 'step': 526, 'epoch': 1} {'type': 'loss', 'content': 0.17937643826007843, 'timestamp': '2025-09-30 22:12:58.286784', 'step': 527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:58.317029', 'step': 527, 'epoch': 1} {'type': 'loss', 'content': 0.17172162234783173, 'timestamp': '2025-09-30 22:12:58.343101', 'step': 528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:58.381108', 'step': 528, 'epoch': 1} {'type': 'loss', 'content': 0.15236543118953705, 'timestamp': '2025-09-30 22:12:58.384221', 'step': 529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.414506', 'step': 529, 'epoch': 1} {'type': 'loss', 'content': 0.20677649974822998, 'timestamp': '2025-09-30 22:12:58.418233', 'step': 530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:58.448846', 'step': 530, 'epoch': 1} {'type': 'loss', 'content': 0.14510419964790344, 'timestamp': '2025-09-30 22:12:58.452493', 'step': 531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:58.489767', 'step': 531, 'epoch': 1} {'type': 'loss', 'content': 0.14491185545921326, 'timestamp': '2025-09-30 22:12:58.515659', 'step': 532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:12:58.550489', 'step': 532, 'epoch': 1} {'type': 'loss', 'content': 0.15349094569683075, 'timestamp': '2025-09-30 22:12:58.557905', 'step': 533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:12:58.600092', 'step': 533, 'epoch': 1} {'type': 'loss', 'content': 0.22267185151576996, 'timestamp': '2025-09-30 22:12:58.604781', 'step': 534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.634767', 'step': 534, 'epoch': 1} {'type': 'loss', 'content': 0.37842217087745667, 'timestamp': '2025-09-30 22:12:58.637493', 'step': 535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:58.671619', 'step': 535, 'epoch': 1} {'type': 'loss', 'content': 0.1742851585149765, 'timestamp': '2025-09-30 22:12:58.696584', 'step': 536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.727418', 'step': 536, 'epoch': 1} {'type': 'loss', 'content': 0.1262207180261612, 'timestamp': '2025-09-30 22:12:58.730831', 'step': 537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.761985', 'step': 537, 'epoch': 1} {'type': 'loss', 'content': 0.10187752544879913, 'timestamp': '2025-09-30 22:12:58.764680', 'step': 538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.801729', 'step': 538, 'epoch': 1} {'type': 'loss', 'content': 0.2233732044696808, 'timestamp': '2025-09-30 22:12:58.805887', 'step': 539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:58.836643', 'step': 539, 'epoch': 1} {'type': 'loss', 'content': 0.20572252571582794, 'timestamp': '2025-09-30 22:12:58.860766', 'step': 540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.891261', 'step': 540, 'epoch': 1} {'type': 'loss', 'content': 0.19419348239898682, 'timestamp': '2025-09-30 22:12:58.893931', 'step': 541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.927885', 'step': 541, 'epoch': 1} {'type': 'loss', 'content': 0.1822165548801422, 'timestamp': '2025-09-30 22:12:58.930841', 'step': 542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:58.965261', 'step': 542, 'epoch': 1} {'type': 'loss', 'content': 0.13547879457473755, 'timestamp': '2025-09-30 22:12:58.967982', 'step': 543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:59.005161', 'step': 543, 'epoch': 1} {'type': 'loss', 'content': 0.23577478528022766, 'timestamp': '2025-09-30 22:12:59.030826', 'step': 544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.061331', 'step': 544, 'epoch': 1} {'type': 'loss', 'content': 0.18234297633171082, 'timestamp': '2025-09-30 22:12:59.063640', 'step': 545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.093687', 'step': 545, 'epoch': 1} {'type': 'loss', 'content': 0.22070835530757904, 'timestamp': '2025-09-30 22:12:59.096857', 'step': 546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.127743', 'step': 546, 'epoch': 1} {'type': 'loss', 'content': 0.1705092340707779, 'timestamp': '2025-09-30 22:12:59.130888', 'step': 547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:59.161934', 'step': 547, 'epoch': 1} {'type': 'loss', 'content': 0.26912450790405273, 'timestamp': '2025-09-30 22:12:59.186083', 'step': 548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.217803', 'step': 548, 'epoch': 1} {'type': 'loss', 'content': 0.29736265540122986, 'timestamp': '2025-09-30 22:12:59.220824', 'step': 549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.257118', 'step': 549, 'epoch': 1} {'type': 'loss', 'content': 0.10414490848779678, 'timestamp': '2025-09-30 22:12:59.262045', 'step': 550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.292875', 'step': 550, 'epoch': 1} {'type': 'loss', 'content': 0.19650481641292572, 'timestamp': '2025-09-30 22:12:59.296502', 'step': 551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.326145', 'step': 551, 'epoch': 1} {'type': 'loss', 'content': 0.22514910995960236, 'timestamp': '2025-09-30 22:12:59.352381', 'step': 552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.381976', 'step': 552, 'epoch': 1} {'type': 'loss', 'content': 0.18585118651390076, 'timestamp': '2025-09-30 22:12:59.384232', 'step': 553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.413901', 'step': 553, 'epoch': 1} {'type': 'loss', 'content': 0.20672395825386047, 'timestamp': '2025-09-30 22:12:59.416667', 'step': 554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.453573', 'step': 554, 'epoch': 1} {'type': 'loss', 'content': 0.15504246950149536, 'timestamp': '2025-09-30 22:12:59.458241', 'step': 555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.489718', 'step': 555, 'epoch': 1} {'type': 'loss', 'content': 0.14912299811840057, 'timestamp': '2025-09-30 22:12:59.516378', 'step': 556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.548287', 'step': 556, 'epoch': 1} {'type': 'loss', 'content': 0.14081403613090515, 'timestamp': '2025-09-30 22:12:59.551873', 'step': 557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:59.585362', 'step': 557, 'epoch': 1} {'type': 'loss', 'content': 0.10068654268980026, 'timestamp': '2025-09-30 22:12:59.589514', 'step': 558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.625667', 'step': 558, 'epoch': 1} {'type': 'loss', 'content': 0.18048442900180817, 'timestamp': '2025-09-30 22:12:59.630888', 'step': 559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:12:59.664744', 'step': 559, 'epoch': 1} {'type': 'loss', 'content': 0.284214586019516, 'timestamp': '2025-09-30 22:12:59.690463', 'step': 560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.729649', 'step': 560, 'epoch': 1} {'type': 'loss', 'content': 0.1738305240869522, 'timestamp': '2025-09-30 22:12:59.732842', 'step': 561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.767227', 'step': 561, 'epoch': 1} {'type': 'loss', 'content': 0.12283861637115479, 'timestamp': '2025-09-30 22:12:59.777357', 'step': 562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:12:59.809563', 'step': 562, 'epoch': 1} {'type': 'loss', 'content': 0.18132926523685455, 'timestamp': '2025-09-30 22:12:59.812439', 'step': 563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.842716', 'step': 563, 'epoch': 1} {'type': 'loss', 'content': 0.21919132769107819, 'timestamp': '2025-09-30 22:12:59.871093', 'step': 564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:12:59.905663', 'step': 564, 'epoch': 1} {'type': 'loss', 'content': 0.282444566488266, 'timestamp': '2025-09-30 22:12:59.908739', 'step': 565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:12:59.946168', 'step': 565, 'epoch': 1} {'type': 'loss', 'content': 0.22709685564041138, 'timestamp': '2025-09-30 22:12:59.948678', 'step': 566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:12:59.986564', 'step': 566, 'epoch': 1} {'type': 'loss', 'content': 0.3055093288421631, 'timestamp': '2025-09-30 22:12:59.989247', 'step': 567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.023281', 'step': 567, 'epoch': 1} {'type': 'loss', 'content': 0.16715949773788452, 'timestamp': '2025-09-30 22:13:00.047754', 'step': 568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.078919', 'step': 568, 'epoch': 1} {'type': 'loss', 'content': 0.24739831686019897, 'timestamp': '2025-09-30 22:13:00.081120', 'step': 569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.113930', 'step': 569, 'epoch': 1} {'type': 'loss', 'content': 0.1506607085466385, 'timestamp': '2025-09-30 22:13:00.119616', 'step': 570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:00.150115', 'step': 570, 'epoch': 1} {'type': 'loss', 'content': 0.18750320374965668, 'timestamp': '2025-09-30 22:13:00.152474', 'step': 571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.184618', 'step': 571, 'epoch': 1} {'type': 'loss', 'content': 0.17836302518844604, 'timestamp': '2025-09-30 22:13:00.210193', 'step': 572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.240050', 'step': 572, 'epoch': 1} {'type': 'loss', 'content': 0.13835257291793823, 'timestamp': '2025-09-30 22:13:00.242356', 'step': 573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:00.273168', 'step': 573, 'epoch': 1} {'type': 'loss', 'content': 0.3257352411746979, 'timestamp': '2025-09-30 22:13:00.275596', 'step': 574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.306260', 'step': 574, 'epoch': 1} {'type': 'loss', 'content': 0.1375042200088501, 'timestamp': '2025-09-30 22:13:00.316626', 'step': 575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.355726', 'step': 575, 'epoch': 1} {'type': 'loss', 'content': 0.1822260320186615, 'timestamp': '2025-09-30 22:13:00.381051', 'step': 576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:00.418510', 'step': 576, 'epoch': 1} {'type': 'loss', 'content': 0.08991015702486038, 'timestamp': '2025-09-30 22:13:00.421990', 'step': 577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.453499', 'step': 577, 'epoch': 1} {'type': 'loss', 'content': 0.24400219321250916, 'timestamp': '2025-09-30 22:13:00.461867', 'step': 578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.499544', 'step': 578, 'epoch': 1} {'type': 'loss', 'content': 0.2771553099155426, 'timestamp': '2025-09-30 22:13:00.502328', 'step': 579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:00.533333', 'step': 579, 'epoch': 1} {'type': 'loss', 'content': 0.17243711650371552, 'timestamp': '2025-09-30 22:13:00.557550', 'step': 580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.589091', 'step': 580, 'epoch': 1} {'type': 'loss', 'content': 0.15162794291973114, 'timestamp': '2025-09-30 22:13:00.597178', 'step': 581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.630667', 'step': 581, 'epoch': 1} {'type': 'loss', 'content': 0.11221498996019363, 'timestamp': '2025-09-30 22:13:00.637811', 'step': 582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.673836', 'step': 582, 'epoch': 1} {'type': 'loss', 'content': 0.1889074593782425, 'timestamp': '2025-09-30 22:13:00.676928', 'step': 583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.712111', 'step': 583, 'epoch': 1} {'type': 'loss', 'content': 0.1988602578639984, 'timestamp': '2025-09-30 22:13:00.737365', 'step': 584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:00.770719', 'step': 584, 'epoch': 1} {'type': 'loss', 'content': 0.16209056973457336, 'timestamp': '2025-09-30 22:13:00.779958', 'step': 585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:13:00.821071', 'step': 585, 'epoch': 1} {'type': 'loss', 'content': 0.12092352658510208, 'timestamp': '2025-09-30 22:13:00.825528', 'step': 586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:00.856819', 'step': 586, 'epoch': 1} {'type': 'loss', 'content': 0.15275172889232635, 'timestamp': '2025-09-30 22:13:00.859695', 'step': 587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:00.893955', 'step': 587, 'epoch': 1} {'type': 'loss', 'content': 0.1801944375038147, 'timestamp': '2025-09-30 22:13:00.921720', 'step': 588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:00.954530', 'step': 588, 'epoch': 1} {'type': 'loss', 'content': 0.2599727213382721, 'timestamp': '2025-09-30 22:13:00.957536', 'step': 589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:00.989456', 'step': 589, 'epoch': 1} {'type': 'loss', 'content': 0.19012361764907837, 'timestamp': '2025-09-30 22:13:00.993625', 'step': 590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:01.028054', 'step': 590, 'epoch': 1} {'type': 'loss', 'content': 0.18332771956920624, 'timestamp': '2025-09-30 22:13:01.030915', 'step': 591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:13:01.065655', 'step': 591, 'epoch': 1} {'type': 'loss', 'content': 0.18251410126686096, 'timestamp': '2025-09-30 22:13:01.093873', 'step': 592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:01.129537', 'step': 592, 'epoch': 1} {'type': 'loss', 'content': 0.21168291568756104, 'timestamp': '2025-09-30 22:13:01.132162', 'step': 593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:01.163163', 'step': 593, 'epoch': 1} {'type': 'loss', 'content': 0.1392967849969864, 'timestamp': '2025-09-30 22:13:01.168665', 'step': 594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.199016', 'step': 594, 'epoch': 1} {'type': 'loss', 'content': 0.157653346657753, 'timestamp': '2025-09-30 22:13:01.202117', 'step': 595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.239609', 'step': 595, 'epoch': 1} {'type': 'loss', 'content': 0.3541877865791321, 'timestamp': '2025-09-30 22:13:01.263882', 'step': 596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:01.302191', 'step': 596, 'epoch': 1} {'type': 'loss', 'content': 0.19188882410526276, 'timestamp': '2025-09-30 22:13:01.304953', 'step': 597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:01.335024', 'step': 597, 'epoch': 1} {'type': 'loss', 'content': 0.21649575233459473, 'timestamp': '2025-09-30 22:13:01.342293', 'step': 598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:01.380373', 'step': 598, 'epoch': 1} {'type': 'loss', 'content': 0.21149536967277527, 'timestamp': '2025-09-30 22:13:01.382723', 'step': 599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:01.412887', 'step': 599, 'epoch': 1} {'type': 'loss', 'content': 0.2139456570148468, 'timestamp': '2025-09-30 22:13:01.439399', 'step': 600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.481171', 'step': 600, 'epoch': 1} {'type': 'loss', 'content': 0.15657302737236023, 'timestamp': '2025-09-30 22:13:01.485307', 'step': 601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:01.519128', 'step': 601, 'epoch': 1} {'type': 'loss', 'content': 0.2896530330181122, 'timestamp': '2025-09-30 22:13:01.521521', 'step': 602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:01.551339', 'step': 602, 'epoch': 1} {'type': 'loss', 'content': 0.24622777104377747, 'timestamp': '2025-09-30 22:13:01.553756', 'step': 603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.584111', 'step': 603, 'epoch': 1} {'type': 'loss', 'content': 0.19209666550159454, 'timestamp': '2025-09-30 22:13:01.611303', 'step': 604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:01.643087', 'step': 604, 'epoch': 1} {'type': 'loss', 'content': 0.15042123198509216, 'timestamp': '2025-09-30 22:13:01.645634', 'step': 605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.677155', 'step': 605, 'epoch': 1} {'type': 'loss', 'content': 0.15620288252830505, 'timestamp': '2025-09-30 22:13:01.686056', 'step': 606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.715749', 'step': 606, 'epoch': 1} {'type': 'loss', 'content': 0.0919727236032486, 'timestamp': '2025-09-30 22:13:01.720326', 'step': 607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:01.751955', 'step': 607, 'epoch': 1} {'type': 'loss', 'content': 0.15949618816375732, 'timestamp': '2025-09-30 22:13:01.776467', 'step': 608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:01.807963', 'step': 608, 'epoch': 1} {'type': 'loss', 'content': 0.09951207786798477, 'timestamp': '2025-09-30 22:13:01.810592', 'step': 609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:01.840069', 'step': 609, 'epoch': 1} {'type': 'loss', 'content': 0.21299952268600464, 'timestamp': '2025-09-30 22:13:01.845363', 'step': 610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:01.877111', 'step': 610, 'epoch': 1} {'type': 'loss', 'content': 0.23091459274291992, 'timestamp': '2025-09-30 22:13:01.882169', 'step': 611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:01.919284', 'step': 611, 'epoch': 1} {'type': 'loss', 'content': 0.22764773666858673, 'timestamp': '2025-09-30 22:13:01.944858', 'step': 612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:01.977512', 'step': 612, 'epoch': 1} {'type': 'loss', 'content': 0.146085724234581, 'timestamp': '2025-09-30 22:13:01.988044', 'step': 613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.022627', 'step': 613, 'epoch': 1} {'type': 'loss', 'content': 0.20240215957164764, 'timestamp': '2025-09-30 22:13:02.026284', 'step': 614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.061956', 'step': 614, 'epoch': 1} {'type': 'loss', 'content': 0.22745075821876526, 'timestamp': '2025-09-30 22:13:02.064451', 'step': 615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.095878', 'step': 615, 'epoch': 1} {'type': 'loss', 'content': 0.17233441770076752, 'timestamp': '2025-09-30 22:13:02.120737', 'step': 616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.152408', 'step': 616, 'epoch': 1} {'type': 'loss', 'content': 0.22132867574691772, 'timestamp': '2025-09-30 22:13:02.155492', 'step': 617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:02.192461', 'step': 617, 'epoch': 1} {'type': 'loss', 'content': 0.2532644271850586, 'timestamp': '2025-09-30 22:13:02.202344', 'step': 618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.236038', 'step': 618, 'epoch': 1} {'type': 'loss', 'content': 0.14161653816699982, 'timestamp': '2025-09-30 22:13:02.239751', 'step': 619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:02.270731', 'step': 619, 'epoch': 1} {'type': 'loss', 'content': 0.2644275426864624, 'timestamp': '2025-09-30 22:13:02.295458', 'step': 620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.328510', 'step': 620, 'epoch': 1} {'type': 'loss', 'content': 0.2236923724412918, 'timestamp': '2025-09-30 22:13:02.342175', 'step': 621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:02.380082', 'step': 621, 'epoch': 1} {'type': 'loss', 'content': 0.25176435708999634, 'timestamp': '2025-09-30 22:13:02.388165', 'step': 622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.423909', 'step': 622, 'epoch': 1} {'type': 'loss', 'content': 0.2667928636074066, 'timestamp': '2025-09-30 22:13:02.432275', 'step': 623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:02.464582', 'step': 623, 'epoch': 1} {'type': 'loss', 'content': 0.20263956487178802, 'timestamp': '2025-09-30 22:13:02.489382', 'step': 624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.521335', 'step': 624, 'epoch': 1} {'type': 'loss', 'content': 0.15271595120429993, 'timestamp': '2025-09-30 22:13:02.524058', 'step': 625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:02.565051', 'step': 625, 'epoch': 1} {'type': 'loss', 'content': 0.20904532074928284, 'timestamp': '2025-09-30 22:13:02.567648', 'step': 626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.600820', 'step': 626, 'epoch': 1} {'type': 'loss', 'content': 0.28021296858787537, 'timestamp': '2025-09-30 22:13:02.607269', 'step': 627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:02.637579', 'step': 627, 'epoch': 1} {'type': 'loss', 'content': 0.1512710154056549, 'timestamp': '2025-09-30 22:13:02.663126', 'step': 628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.693209', 'step': 628, 'epoch': 1} {'type': 'loss', 'content': 0.24776622653007507, 'timestamp': '2025-09-30 22:13:02.695747', 'step': 629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.726835', 'step': 629, 'epoch': 1} {'type': 'loss', 'content': 0.15367810428142548, 'timestamp': '2025-09-30 22:13:02.729369', 'step': 630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.758882', 'step': 630, 'epoch': 1} {'type': 'loss', 'content': 0.16936345398426056, 'timestamp': '2025-09-30 22:13:02.767680', 'step': 631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:02.797649', 'step': 631, 'epoch': 1} {'type': 'loss', 'content': 0.19607722759246826, 'timestamp': '2025-09-30 22:13:02.822027', 'step': 632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.854628', 'step': 632, 'epoch': 1} {'type': 'loss', 'content': 0.1456860899925232, 'timestamp': '2025-09-30 22:13:02.857415', 'step': 633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:02.888685', 'step': 633, 'epoch': 1} {'type': 'loss', 'content': 0.21494312584400177, 'timestamp': '2025-09-30 22:13:02.893253', 'step': 634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:02.926609', 'step': 634, 'epoch': 1} {'type': 'loss', 'content': 0.14152804017066956, 'timestamp': '2025-09-30 22:13:02.931641', 'step': 635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:02.975081', 'step': 635, 'epoch': 1} {'type': 'loss', 'content': 0.19230294227600098, 'timestamp': '2025-09-30 22:13:03.000526', 'step': 636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:03.030956', 'step': 636, 'epoch': 1} {'type': 'loss', 'content': 0.1805216372013092, 'timestamp': '2025-09-30 22:13:03.038882', 'step': 637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:03.075890', 'step': 637, 'epoch': 1} {'type': 'loss', 'content': 0.23156045377254486, 'timestamp': '2025-09-30 22:13:03.079440', 'step': 638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.136739', 'step': 638, 'epoch': 1} {'type': 'loss', 'content': 0.19029752910137177, 'timestamp': '2025-09-30 22:13:03.141928', 'step': 639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:03.176103', 'step': 639, 'epoch': 1} {'type': 'loss', 'content': 0.16035863757133484, 'timestamp': '2025-09-30 22:13:03.200329', 'step': 640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.230580', 'step': 640, 'epoch': 1} {'type': 'loss', 'content': 0.189298614859581, 'timestamp': '2025-09-30 22:13:03.233787', 'step': 641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.264422', 'step': 641, 'epoch': 1} {'type': 'loss', 'content': 0.18197999894618988, 'timestamp': '2025-09-30 22:13:03.278589', 'step': 642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.312098', 'step': 642, 'epoch': 1} {'type': 'loss', 'content': 0.23379524052143097, 'timestamp': '2025-09-30 22:13:03.314349', 'step': 643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.349109', 'step': 643, 'epoch': 1} {'type': 'loss', 'content': 0.1796715259552002, 'timestamp': '2025-09-30 22:13:03.373614', 'step': 644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:03.405031', 'step': 644, 'epoch': 1} {'type': 'loss', 'content': 0.21140901744365692, 'timestamp': '2025-09-30 22:13:03.407340', 'step': 645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.443617', 'step': 645, 'epoch': 1} {'type': 'loss', 'content': 0.1855195164680481, 'timestamp': '2025-09-30 22:13:03.446460', 'step': 646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:03.480641', 'step': 646, 'epoch': 1} {'type': 'loss', 'content': 0.21242749691009521, 'timestamp': '2025-09-30 22:13:03.484248', 'step': 647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:03.515135', 'step': 647, 'epoch': 1} {'type': 'loss', 'content': 0.24990524351596832, 'timestamp': '2025-09-30 22:13:03.544390', 'step': 648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.578363', 'step': 648, 'epoch': 1} {'type': 'loss', 'content': 0.19896046817302704, 'timestamp': '2025-09-30 22:13:03.582288', 'step': 649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:03.620284', 'step': 649, 'epoch': 1} {'type': 'loss', 'content': 0.16430336236953735, 'timestamp': '2025-09-30 22:13:03.623261', 'step': 650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:03.654910', 'step': 650, 'epoch': 1} {'type': 'loss', 'content': 0.31673333048820496, 'timestamp': '2025-09-30 22:13:03.657949', 'step': 651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:03.689416', 'step': 651, 'epoch': 1} {'type': 'loss', 'content': 0.09918239712715149, 'timestamp': '2025-09-30 22:13:03.723852', 'step': 652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:03.761459', 'step': 652, 'epoch': 1} {'type': 'loss', 'content': 0.1955193281173706, 'timestamp': '2025-09-30 22:13:03.765511', 'step': 653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.797215', 'step': 653, 'epoch': 1} {'type': 'loss', 'content': 0.20500318706035614, 'timestamp': '2025-09-30 22:13:03.800500', 'step': 654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:03.837474', 'step': 654, 'epoch': 1} {'type': 'loss', 'content': 0.21399301290512085, 'timestamp': '2025-09-30 22:13:03.840689', 'step': 655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.871530', 'step': 655, 'epoch': 1} {'type': 'loss', 'content': 0.10556919872760773, 'timestamp': '2025-09-30 22:13:03.896190', 'step': 656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:03.930223', 'step': 656, 'epoch': 1} {'type': 'loss', 'content': 0.14676925539970398, 'timestamp': '2025-09-30 22:13:03.932926', 'step': 657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:03.963334', 'step': 657, 'epoch': 1} {'type': 'loss', 'content': 0.1539665311574936, 'timestamp': '2025-09-30 22:13:03.966221', 'step': 658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:03.995823', 'step': 658, 'epoch': 1} {'type': 'loss', 'content': 0.2023889422416687, 'timestamp': '2025-09-30 22:13:04.002403', 'step': 659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.033585', 'step': 659, 'epoch': 1} {'type': 'loss', 'content': 0.15067040920257568, 'timestamp': '2025-09-30 22:13:04.058988', 'step': 660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.095144', 'step': 660, 'epoch': 1} {'type': 'loss', 'content': 0.18126335740089417, 'timestamp': '2025-09-30 22:13:04.097327', 'step': 661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.130950', 'step': 661, 'epoch': 1} {'type': 'loss', 'content': 0.24168066680431366, 'timestamp': '2025-09-30 22:13:04.134788', 'step': 662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.164717', 'step': 662, 'epoch': 1} {'type': 'loss', 'content': 0.2888621985912323, 'timestamp': '2025-09-30 22:13:04.167498', 'step': 663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.196491', 'step': 663, 'epoch': 1} {'type': 'loss', 'content': 0.22166748344898224, 'timestamp': '2025-09-30 22:13:04.220783', 'step': 664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.250814', 'step': 664, 'epoch': 1} {'type': 'loss', 'content': 0.231820747256279, 'timestamp': '2025-09-30 22:13:04.260430', 'step': 665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:04.295146', 'step': 665, 'epoch': 1} {'type': 'loss', 'content': 0.13536007702350616, 'timestamp': '2025-09-30 22:13:04.297062', 'step': 666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:04.332886', 'step': 666, 'epoch': 1} {'type': 'loss', 'content': 0.22970843315124512, 'timestamp': '2025-09-30 22:13:04.335326', 'step': 667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:04.365325', 'step': 667, 'epoch': 1} {'type': 'loss', 'content': 0.19866345822811127, 'timestamp': '2025-09-30 22:13:04.390007', 'step': 668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:04.420615', 'step': 668, 'epoch': 1} {'type': 'loss', 'content': 0.3941478431224823, 'timestamp': '2025-09-30 22:13:04.423563', 'step': 669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:04.457169', 'step': 669, 'epoch': 1} {'type': 'loss', 'content': 0.24790573120117188, 'timestamp': '2025-09-30 22:13:04.461668', 'step': 670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.492749', 'step': 670, 'epoch': 1} {'type': 'loss', 'content': 0.17629359662532806, 'timestamp': '2025-09-30 22:13:04.496127', 'step': 671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:04.527747', 'step': 671, 'epoch': 1} {'type': 'loss', 'content': 0.18348826467990875, 'timestamp': '2025-09-30 22:13:04.552184', 'step': 672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:04.593697', 'step': 672, 'epoch': 1} {'type': 'loss', 'content': 0.18579764664173126, 'timestamp': '2025-09-30 22:13:04.595807', 'step': 673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.626668', 'step': 673, 'epoch': 1} {'type': 'loss', 'content': 0.19240762293338776, 'timestamp': '2025-09-30 22:13:04.629908', 'step': 674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.662380', 'step': 674, 'epoch': 1} {'type': 'loss', 'content': 0.17627137899398804, 'timestamp': '2025-09-30 22:13:04.672866', 'step': 675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:04.702938', 'step': 675, 'epoch': 1} {'type': 'loss', 'content': 0.1847812980413437, 'timestamp': '2025-09-30 22:13:04.726775', 'step': 676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.761926', 'step': 676, 'epoch': 1} {'type': 'loss', 'content': 0.146523579955101, 'timestamp': '2025-09-30 22:13:04.770927', 'step': 677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:04.803354', 'step': 677, 'epoch': 1} {'type': 'loss', 'content': 0.3372059166431427, 'timestamp': '2025-09-30 22:13:04.812581', 'step': 678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:04.853823', 'step': 678, 'epoch': 1} {'type': 'loss', 'content': 0.1749565452337265, 'timestamp': '2025-09-30 22:13:04.856451', 'step': 679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:04.887087', 'step': 679, 'epoch': 1} {'type': 'loss', 'content': 0.1997258961200714, 'timestamp': '2025-09-30 22:13:04.912719', 'step': 680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:04.955548', 'step': 680, 'epoch': 1} {'type': 'loss', 'content': 0.18142694234848022, 'timestamp': '2025-09-30 22:13:04.959793', 'step': 681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:04.991448', 'step': 681, 'epoch': 1} {'type': 'loss', 'content': 0.1895354986190796, 'timestamp': '2025-09-30 22:13:04.994596', 'step': 682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.027846', 'step': 682, 'epoch': 1} {'type': 'loss', 'content': 0.18596740067005157, 'timestamp': '2025-09-30 22:13:05.031450', 'step': 683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.075408', 'step': 683, 'epoch': 1} {'type': 'loss', 'content': 0.15014971792697906, 'timestamp': '2025-09-30 22:13:05.099408', 'step': 684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.129880', 'step': 684, 'epoch': 1} {'type': 'loss', 'content': 0.2020583152770996, 'timestamp': '2025-09-30 22:13:05.141688', 'step': 685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.181321', 'step': 685, 'epoch': 1} {'type': 'loss', 'content': 0.21068459749221802, 'timestamp': '2025-09-30 22:13:05.192797', 'step': 686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.223918', 'step': 686, 'epoch': 1} {'type': 'loss', 'content': 0.19672401249408722, 'timestamp': '2025-09-30 22:13:05.226025', 'step': 687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:05.257182', 'step': 687, 'epoch': 1} {'type': 'loss', 'content': 0.14536800980567932, 'timestamp': '2025-09-30 22:13:05.284783', 'step': 688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.314842', 'step': 688, 'epoch': 1} {'type': 'loss', 'content': 0.2031601071357727, 'timestamp': '2025-09-30 22:13:05.321882', 'step': 689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:05.352416', 'step': 689, 'epoch': 1} {'type': 'loss', 'content': 0.24771317839622498, 'timestamp': '2025-09-30 22:13:05.357966', 'step': 690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.392258', 'step': 690, 'epoch': 1} {'type': 'loss', 'content': 0.17160305380821228, 'timestamp': '2025-09-30 22:13:05.399043', 'step': 691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.429406', 'step': 691, 'epoch': 1} {'type': 'loss', 'content': 0.20316454768180847, 'timestamp': '2025-09-30 22:13:05.454561', 'step': 692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.484727', 'step': 692, 'epoch': 1} {'type': 'loss', 'content': 0.22019138932228088, 'timestamp': '2025-09-30 22:13:05.491991', 'step': 693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.522161', 'step': 693, 'epoch': 1} {'type': 'loss', 'content': 0.2767007648944855, 'timestamp': '2025-09-30 22:13:05.524294', 'step': 694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:05.560660', 'step': 694, 'epoch': 1} {'type': 'loss', 'content': 0.2579320967197418, 'timestamp': '2025-09-30 22:13:05.562890', 'step': 695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:05.593295', 'step': 695, 'epoch': 1} {'type': 'loss', 'content': 0.15526336431503296, 'timestamp': '2025-09-30 22:13:05.618092', 'step': 696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.655320', 'step': 696, 'epoch': 1} {'type': 'loss', 'content': 0.21862748265266418, 'timestamp': '2025-09-30 22:13:05.658062', 'step': 697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:05.690965', 'step': 697, 'epoch': 1} {'type': 'loss', 'content': 0.19744479656219482, 'timestamp': '2025-09-30 22:13:05.693636', 'step': 698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.723698', 'step': 698, 'epoch': 1} {'type': 'loss', 'content': 0.22969062626361847, 'timestamp': '2025-09-30 22:13:05.729162', 'step': 699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.760958', 'step': 699, 'epoch': 1} {'type': 'loss', 'content': 0.09281985461711884, 'timestamp': '2025-09-30 22:13:05.785419', 'step': 700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:05.820458', 'step': 700, 'epoch': 1} {'type': 'loss', 'content': 0.2287636250257492, 'timestamp': '2025-09-30 22:13:05.827118', 'step': 701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:05.860483', 'step': 701, 'epoch': 1} {'type': 'loss', 'content': 0.1312755048274994, 'timestamp': '2025-09-30 22:13:05.871881', 'step': 702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:05.916408', 'step': 702, 'epoch': 1} {'type': 'loss', 'content': 0.20478124916553497, 'timestamp': '2025-09-30 22:13:05.919286', 'step': 703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:05.961166', 'step': 703, 'epoch': 1} {'type': 'loss', 'content': 0.19017009437084198, 'timestamp': '2025-09-30 22:13:05.990928', 'step': 704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:06.022104', 'step': 704, 'epoch': 1} {'type': 'loss', 'content': 0.22645211219787598, 'timestamp': '2025-09-30 22:13:06.027887', 'step': 705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.058387', 'step': 705, 'epoch': 1} {'type': 'loss', 'content': 0.24115674197673798, 'timestamp': '2025-09-30 22:13:06.071633', 'step': 706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:06.104472', 'step': 706, 'epoch': 1} {'type': 'loss', 'content': 0.2675936222076416, 'timestamp': '2025-09-30 22:13:06.114286', 'step': 707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.153641', 'step': 707, 'epoch': 1} {'type': 'loss', 'content': 0.18389736115932465, 'timestamp': '2025-09-30 22:13:06.199832', 'step': 708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:06.237160', 'step': 708, 'epoch': 1} {'type': 'loss', 'content': 0.14385779201984406, 'timestamp': '2025-09-30 22:13:06.253998', 'step': 709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.288329', 'step': 709, 'epoch': 1} {'type': 'loss', 'content': 0.18741095066070557, 'timestamp': '2025-09-30 22:13:06.298905', 'step': 710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.339736', 'step': 710, 'epoch': 1} {'type': 'loss', 'content': 0.20358870923519135, 'timestamp': '2025-09-30 22:13:06.356652', 'step': 711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:06.393462', 'step': 711, 'epoch': 1} {'type': 'loss', 'content': 0.1450665295124054, 'timestamp': '2025-09-30 22:13:06.421998', 'step': 712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:06.461705', 'step': 712, 'epoch': 1} {'type': 'loss', 'content': 0.21382588148117065, 'timestamp': '2025-09-30 22:13:06.475008', 'step': 713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.509322', 'step': 713, 'epoch': 1} {'type': 'loss', 'content': 0.1802804172039032, 'timestamp': '2025-09-30 22:13:06.515419', 'step': 714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.554964', 'step': 714, 'epoch': 1} {'type': 'loss', 'content': 0.16936326026916504, 'timestamp': '2025-09-30 22:13:06.572729', 'step': 715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:06.610565', 'step': 715, 'epoch': 1} {'type': 'loss', 'content': 0.19064083695411682, 'timestamp': '2025-09-30 22:13:06.649792', 'step': 716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:06.685108', 'step': 716, 'epoch': 1} {'type': 'loss', 'content': 0.13832303881645203, 'timestamp': '2025-09-30 22:13:06.697216', 'step': 717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.736660', 'step': 717, 'epoch': 1} {'type': 'loss', 'content': 0.2374248057603836, 'timestamp': '2025-09-30 22:13:06.746378', 'step': 718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:06.788636', 'step': 718, 'epoch': 1} {'type': 'loss', 'content': 0.22548849880695343, 'timestamp': '2025-09-30 22:13:06.796367', 'step': 719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:06.834751', 'step': 719, 'epoch': 1} {'type': 'loss', 'content': 0.1373426616191864, 'timestamp': '2025-09-30 22:13:06.869997', 'step': 720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:06.906528', 'step': 720, 'epoch': 1} {'type': 'loss', 'content': 0.11873012036085129, 'timestamp': '2025-09-30 22:13:06.909739', 'step': 721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:06.945425', 'step': 721, 'epoch': 1} {'type': 'loss', 'content': 0.1950964778661728, 'timestamp': '2025-09-30 22:13:06.963890', 'step': 722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.007523', 'step': 722, 'epoch': 1} {'type': 'loss', 'content': 0.2204851508140564, 'timestamp': '2025-09-30 22:13:07.013230', 'step': 723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:07.058280', 'step': 723, 'epoch': 1} {'type': 'loss', 'content': 0.0665564239025116, 'timestamp': '2025-09-30 22:13:07.086488', 'step': 724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.117174', 'step': 724, 'epoch': 1} {'type': 'loss', 'content': 0.21833020448684692, 'timestamp': '2025-09-30 22:13:07.120716', 'step': 725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.151927', 'step': 725, 'epoch': 1} {'type': 'loss', 'content': 0.2248961329460144, 'timestamp': '2025-09-30 22:13:07.155421', 'step': 726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.186000', 'step': 726, 'epoch': 1} {'type': 'loss', 'content': 0.14350943267345428, 'timestamp': '2025-09-30 22:13:07.188251', 'step': 727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.218532', 'step': 727, 'epoch': 1} {'type': 'loss', 'content': 0.18506576120853424, 'timestamp': '2025-09-30 22:13:07.248228', 'step': 728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.282825', 'step': 728, 'epoch': 1} {'type': 'loss', 'content': 0.18121623992919922, 'timestamp': '2025-09-30 22:13:07.285072', 'step': 729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:07.316460', 'step': 729, 'epoch': 1} {'type': 'loss', 'content': 0.17465704679489136, 'timestamp': '2025-09-30 22:13:07.324289', 'step': 730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.362699', 'step': 730, 'epoch': 1} {'type': 'loss', 'content': 0.22898346185684204, 'timestamp': '2025-09-30 22:13:07.366006', 'step': 731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:07.397595', 'step': 731, 'epoch': 1} {'type': 'loss', 'content': 0.14866165816783905, 'timestamp': '2025-09-30 22:13:07.427238', 'step': 732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.466445', 'step': 732, 'epoch': 1} {'type': 'loss', 'content': 0.21680662035942078, 'timestamp': '2025-09-30 22:13:07.470139', 'step': 733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:07.501165', 'step': 733, 'epoch': 1} {'type': 'loss', 'content': 0.224595308303833, 'timestamp': '2025-09-30 22:13:07.511879', 'step': 734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.541966', 'step': 734, 'epoch': 1} {'type': 'loss', 'content': 0.2975464463233948, 'timestamp': '2025-09-30 22:13:07.544746', 'step': 735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:07.575829', 'step': 735, 'epoch': 1} {'type': 'loss', 'content': 0.20496980845928192, 'timestamp': '2025-09-30 22:13:07.601591', 'step': 736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:07.632694', 'step': 736, 'epoch': 1} {'type': 'loss', 'content': 0.18084505200386047, 'timestamp': '2025-09-30 22:13:07.641412', 'step': 737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.673066', 'step': 737, 'epoch': 1} {'type': 'loss', 'content': 0.14118154346942902, 'timestamp': '2025-09-30 22:13:07.675444', 'step': 738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:07.706325', 'step': 738, 'epoch': 1} {'type': 'loss', 'content': 0.1584796905517578, 'timestamp': '2025-09-30 22:13:07.711650', 'step': 739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.741417', 'step': 739, 'epoch': 1} {'type': 'loss', 'content': 0.1151171550154686, 'timestamp': '2025-09-30 22:13:07.772648', 'step': 740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.811059', 'step': 740, 'epoch': 1} {'type': 'loss', 'content': 0.17061801254749298, 'timestamp': '2025-09-30 22:13:07.814413', 'step': 741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:07.858856', 'step': 741, 'epoch': 1} {'type': 'loss', 'content': 0.17035256326198578, 'timestamp': '2025-09-30 22:13:07.861878', 'step': 742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.892626', 'step': 742, 'epoch': 1} {'type': 'loss', 'content': 0.23964352905750275, 'timestamp': '2025-09-30 22:13:07.898833', 'step': 743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:07.932273', 'step': 743, 'epoch': 1} {'type': 'loss', 'content': 0.1937505006790161, 'timestamp': '2025-09-30 22:13:07.956196', 'step': 744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:07.990420', 'step': 744, 'epoch': 1} {'type': 'loss', 'content': 0.15235194563865662, 'timestamp': '2025-09-30 22:13:07.999340', 'step': 745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.031338', 'step': 745, 'epoch': 1} {'type': 'loss', 'content': 0.20518513023853302, 'timestamp': '2025-09-30 22:13:08.033682', 'step': 746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:08.064276', 'step': 746, 'epoch': 1} {'type': 'loss', 'content': 0.2071963995695114, 'timestamp': '2025-09-30 22:13:08.066588', 'step': 747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.109862', 'step': 747, 'epoch': 1} {'type': 'loss', 'content': 0.22625206410884857, 'timestamp': '2025-09-30 22:13:08.133718', 'step': 748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:08.165510', 'step': 748, 'epoch': 1} {'type': 'loss', 'content': 0.21646614372730255, 'timestamp': '2025-09-30 22:13:08.169417', 'step': 749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.199116', 'step': 749, 'epoch': 1} {'type': 'loss', 'content': 0.25931450724601746, 'timestamp': '2025-09-30 22:13:08.204423', 'step': 750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.237678', 'step': 750, 'epoch': 1} {'type': 'loss', 'content': 0.24555456638336182, 'timestamp': '2025-09-30 22:13:08.242524', 'step': 751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.273616', 'step': 751, 'epoch': 1} {'type': 'loss', 'content': 0.27080368995666504, 'timestamp': '2025-09-30 22:13:08.299065', 'step': 752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.329478', 'step': 752, 'epoch': 1} {'type': 'loss', 'content': 0.1902945339679718, 'timestamp': '2025-09-30 22:13:08.331728', 'step': 753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.362154', 'step': 753, 'epoch': 1} {'type': 'loss', 'content': 0.22428284585475922, 'timestamp': '2025-09-30 22:13:08.364913', 'step': 754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:08.394661', 'step': 754, 'epoch': 1} {'type': 'loss', 'content': 0.17582839727401733, 'timestamp': '2025-09-30 22:13:08.397308', 'step': 755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:08.427438', 'step': 755, 'epoch': 1} {'type': 'loss', 'content': 0.15312187373638153, 'timestamp': '2025-09-30 22:13:08.452078', 'step': 756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:08.484468', 'step': 756, 'epoch': 1} {'type': 'loss', 'content': 0.16605624556541443, 'timestamp': '2025-09-30 22:13:08.491304', 'step': 757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.526708', 'step': 757, 'epoch': 1} {'type': 'loss', 'content': 0.34036874771118164, 'timestamp': '2025-09-30 22:13:08.530348', 'step': 758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.564736', 'step': 758, 'epoch': 1} {'type': 'loss', 'content': 0.13970594108104706, 'timestamp': '2025-09-30 22:13:08.567252', 'step': 759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.597163', 'step': 759, 'epoch': 1} {'type': 'loss', 'content': 0.1422232985496521, 'timestamp': '2025-09-30 22:13:08.620832', 'step': 760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.653856', 'step': 760, 'epoch': 1} {'type': 'loss', 'content': 0.14313985407352448, 'timestamp': '2025-09-30 22:13:08.656809', 'step': 761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.690069', 'step': 761, 'epoch': 1} {'type': 'loss', 'content': 0.1487056314945221, 'timestamp': '2025-09-30 22:13:08.692669', 'step': 762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.727514', 'step': 762, 'epoch': 1} {'type': 'loss', 'content': 0.30844080448150635, 'timestamp': '2025-09-30 22:13:08.730538', 'step': 763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.767437', 'step': 763, 'epoch': 1} {'type': 'loss', 'content': 0.16448010504245758, 'timestamp': '2025-09-30 22:13:08.796043', 'step': 764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:08.827740', 'step': 764, 'epoch': 1} {'type': 'loss', 'content': 0.3126259446144104, 'timestamp': '2025-09-30 22:13:08.830622', 'step': 765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.860749', 'step': 765, 'epoch': 1} {'type': 'loss', 'content': 0.25515806674957275, 'timestamp': '2025-09-30 22:13:08.866039', 'step': 766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:08.899738', 'step': 766, 'epoch': 1} {'type': 'loss', 'content': 0.12416940927505493, 'timestamp': '2025-09-30 22:13:08.903191', 'step': 767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:08.934634', 'step': 767, 'epoch': 1} {'type': 'loss', 'content': 0.19505837559700012, 'timestamp': '2025-09-30 22:13:08.959631', 'step': 768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:08.990787', 'step': 768, 'epoch': 1} {'type': 'loss', 'content': 0.2275887131690979, 'timestamp': '2025-09-30 22:13:08.997946', 'step': 769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.027870', 'step': 769, 'epoch': 1} {'type': 'loss', 'content': 0.16720612347126007, 'timestamp': '2025-09-30 22:13:09.030703', 'step': 770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:09.060809', 'step': 770, 'epoch': 1} {'type': 'loss', 'content': 0.14623761177062988, 'timestamp': '2025-09-30 22:13:09.063130', 'step': 771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:09.093709', 'step': 771, 'epoch': 1} {'type': 'loss', 'content': 0.0854148268699646, 'timestamp': '2025-09-30 22:13:09.117821', 'step': 772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.147751', 'step': 772, 'epoch': 1} {'type': 'loss', 'content': 0.18032193183898926, 'timestamp': '2025-09-30 22:13:09.152735', 'step': 773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:09.183103', 'step': 773, 'epoch': 1} {'type': 'loss', 'content': 0.20784568786621094, 'timestamp': '2025-09-30 22:13:09.185683', 'step': 774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:09.215790', 'step': 774, 'epoch': 1} {'type': 'loss', 'content': 0.11834239214658737, 'timestamp': '2025-09-30 22:13:09.218421', 'step': 775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.250439', 'step': 775, 'epoch': 1} {'type': 'loss', 'content': 0.25984129309654236, 'timestamp': '2025-09-30 22:13:09.274532', 'step': 776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.309723', 'step': 776, 'epoch': 1} {'type': 'loss', 'content': 0.23127980530261993, 'timestamp': '2025-09-30 22:13:09.312352', 'step': 777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:09.342654', 'step': 777, 'epoch': 1} {'type': 'loss', 'content': 0.09431528300046921, 'timestamp': '2025-09-30 22:13:09.345763', 'step': 778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.381612', 'step': 778, 'epoch': 1} {'type': 'loss', 'content': 0.24557937681674957, 'timestamp': '2025-09-30 22:13:09.384761', 'step': 779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.416417', 'step': 779, 'epoch': 1} {'type': 'loss', 'content': 0.2465871423482895, 'timestamp': '2025-09-30 22:13:09.440928', 'step': 780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:09.471512', 'step': 780, 'epoch': 1} {'type': 'loss', 'content': 0.12695671617984772, 'timestamp': '2025-09-30 22:13:09.474703', 'step': 781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:09.507628', 'step': 781, 'epoch': 1} {'type': 'loss', 'content': 0.15790237486362457, 'timestamp': '2025-09-30 22:13:09.510509', 'step': 782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.543176', 'step': 782, 'epoch': 1} {'type': 'loss', 'content': 0.23603679239749908, 'timestamp': '2025-09-30 22:13:09.545912', 'step': 783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:09.575767', 'step': 783, 'epoch': 1} {'type': 'loss', 'content': 0.22563326358795166, 'timestamp': '2025-09-30 22:13:09.599511', 'step': 784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.629785', 'step': 784, 'epoch': 1} {'type': 'loss', 'content': 0.22109243273735046, 'timestamp': '2025-09-30 22:13:09.637214', 'step': 785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.673327', 'step': 785, 'epoch': 1} {'type': 'loss', 'content': 0.29254424571990967, 'timestamp': '2025-09-30 22:13:09.676367', 'step': 786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:09.712863', 'step': 786, 'epoch': 1} {'type': 'loss', 'content': 0.22656942903995514, 'timestamp': '2025-09-30 22:13:09.717738', 'step': 787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:09.749146', 'step': 787, 'epoch': 1} {'type': 'loss', 'content': 0.13395649194717407, 'timestamp': '2025-09-30 22:13:09.777109', 'step': 788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.808152', 'step': 788, 'epoch': 1} {'type': 'loss', 'content': 0.1639823019504547, 'timestamp': '2025-09-30 22:13:09.811434', 'step': 789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:09.841658', 'step': 789, 'epoch': 1} {'type': 'loss', 'content': 0.2915467321872711, 'timestamp': '2025-09-30 22:13:09.845026', 'step': 790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.876376', 'step': 790, 'epoch': 1} {'type': 'loss', 'content': 0.20364786684513092, 'timestamp': '2025-09-30 22:13:09.879988', 'step': 791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.909886', 'step': 791, 'epoch': 1} {'type': 'loss', 'content': 0.17508213222026825, 'timestamp': '2025-09-30 22:13:09.934504', 'step': 792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:09.963865', 'step': 792, 'epoch': 1} {'type': 'loss', 'content': 0.15176312625408173, 'timestamp': '2025-09-30 22:13:09.966191', 'step': 793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:09.996414', 'step': 793, 'epoch': 1} {'type': 'loss', 'content': 0.2879592180252075, 'timestamp': '2025-09-30 22:13:09.998861', 'step': 794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.029487', 'step': 794, 'epoch': 1} {'type': 'loss', 'content': 0.1543850153684616, 'timestamp': '2025-09-30 22:13:10.034885', 'step': 795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.064704', 'step': 795, 'epoch': 1} {'type': 'loss', 'content': 0.1570679396390915, 'timestamp': '2025-09-30 22:13:10.089100', 'step': 796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:10.118767', 'step': 796, 'epoch': 1} {'type': 'loss', 'content': 0.09650686383247375, 'timestamp': '2025-09-30 22:13:10.122007', 'step': 797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:10.151422', 'step': 797, 'epoch': 1} {'type': 'loss', 'content': 0.11848398298025131, 'timestamp': '2025-09-30 22:13:10.153591', 'step': 798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.182989', 'step': 798, 'epoch': 1} {'type': 'loss', 'content': 0.23738335072994232, 'timestamp': '2025-09-30 22:13:10.185292', 'step': 799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.214224', 'step': 799, 'epoch': 1} {'type': 'loss', 'content': 0.2786615192890167, 'timestamp': '2025-09-30 22:13:10.239813', 'step': 800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.269162', 'step': 800, 'epoch': 1} {'type': 'loss', 'content': 0.20513911545276642, 'timestamp': '2025-09-30 22:13:10.271734', 'step': 801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.301756', 'step': 801, 'epoch': 1} {'type': 'loss', 'content': 0.13376450538635254, 'timestamp': '2025-09-30 22:13:10.304028', 'step': 802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:10.335105', 'step': 802, 'epoch': 1} {'type': 'loss', 'content': 0.23297955095767975, 'timestamp': '2025-09-30 22:13:10.337363', 'step': 803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.377289', 'step': 803, 'epoch': 1} {'type': 'loss', 'content': 0.17391332983970642, 'timestamp': '2025-09-30 22:13:10.408054', 'step': 804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.437667', 'step': 804, 'epoch': 1} {'type': 'loss', 'content': 0.1324511021375656, 'timestamp': '2025-09-30 22:13:10.445320', 'step': 805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.475567', 'step': 805, 'epoch': 1} {'type': 'loss', 'content': 0.21559637784957886, 'timestamp': '2025-09-30 22:13:10.478403', 'step': 806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:10.509014', 'step': 806, 'epoch': 1} {'type': 'loss', 'content': 0.28719010949134827, 'timestamp': '2025-09-30 22:13:10.512670', 'step': 807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:10.547904', 'step': 807, 'epoch': 1} {'type': 'loss', 'content': 0.2015906274318695, 'timestamp': '2025-09-30 22:13:10.572068', 'step': 808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.601658', 'step': 808, 'epoch': 1} {'type': 'loss', 'content': 0.2182006984949112, 'timestamp': '2025-09-30 22:13:10.603857', 'step': 809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.633647', 'step': 809, 'epoch': 1} {'type': 'loss', 'content': 0.1903495043516159, 'timestamp': '2025-09-30 22:13:10.636899', 'step': 810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.667935', 'step': 810, 'epoch': 1} {'type': 'loss', 'content': 0.23558536171913147, 'timestamp': '2025-09-30 22:13:10.670413', 'step': 811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.699785', 'step': 811, 'epoch': 1} {'type': 'loss', 'content': 0.16707168519496918, 'timestamp': '2025-09-30 22:13:10.723838', 'step': 812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:10.757566', 'step': 812, 'epoch': 1} {'type': 'loss', 'content': 0.13932572305202484, 'timestamp': '2025-09-30 22:13:10.762697', 'step': 813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.793426', 'step': 813, 'epoch': 1} {'type': 'loss', 'content': 0.18415509164333344, 'timestamp': '2025-09-30 22:13:10.796477', 'step': 814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:10.829558', 'step': 814, 'epoch': 1} {'type': 'loss', 'content': 0.1851380467414856, 'timestamp': '2025-09-30 22:13:10.834138', 'step': 815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:10.863741', 'step': 815, 'epoch': 1} {'type': 'loss', 'content': 0.181866854429245, 'timestamp': '2025-09-30 22:13:10.895653', 'step': 816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.925493', 'step': 816, 'epoch': 1} {'type': 'loss', 'content': 0.1949991136789322, 'timestamp': '2025-09-30 22:13:10.928274', 'step': 817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:10.959140', 'step': 817, 'epoch': 1} {'type': 'loss', 'content': 0.1942109316587448, 'timestamp': '2025-09-30 22:13:10.962333', 'step': 818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:10.991866', 'step': 818, 'epoch': 1} {'type': 'loss', 'content': 0.18205873668193817, 'timestamp': '2025-09-30 22:13:10.993959', 'step': 819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:11.023441', 'step': 819, 'epoch': 1} {'type': 'loss', 'content': 0.1863226592540741, 'timestamp': '2025-09-30 22:13:11.047372', 'step': 820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:11.077810', 'step': 820, 'epoch': 1} {'type': 'loss', 'content': 0.27137574553489685, 'timestamp': '2025-09-30 22:13:11.082456', 'step': 821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:11.112421', 'step': 821, 'epoch': 1} {'type': 'loss', 'content': 0.16904211044311523, 'timestamp': '2025-09-30 22:13:11.114728', 'step': 822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.145572', 'step': 822, 'epoch': 1} {'type': 'loss', 'content': 0.14669105410575867, 'timestamp': '2025-09-30 22:13:11.147872', 'step': 823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.181189', 'step': 823, 'epoch': 1} {'type': 'loss', 'content': 0.14428730309009552, 'timestamp': '2025-09-30 22:13:11.215305', 'step': 824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:11.245748', 'step': 824, 'epoch': 1} {'type': 'loss', 'content': 0.25890347361564636, 'timestamp': '2025-09-30 22:13:11.251025', 'step': 825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:11.282024', 'step': 825, 'epoch': 1} {'type': 'loss', 'content': 0.14213170111179352, 'timestamp': '2025-09-30 22:13:11.285478', 'step': 826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.316936', 'step': 826, 'epoch': 1} {'type': 'loss', 'content': 0.3094318211078644, 'timestamp': '2025-09-30 22:13:11.320343', 'step': 827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:11.351956', 'step': 827, 'epoch': 1} {'type': 'loss', 'content': 0.24143578112125397, 'timestamp': '2025-09-30 22:13:11.376354', 'step': 828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:11.409153', 'step': 828, 'epoch': 1} {'type': 'loss', 'content': 0.13844098150730133, 'timestamp': '2025-09-30 22:13:11.411657', 'step': 829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:11.441556', 'step': 829, 'epoch': 1} {'type': 'loss', 'content': 0.14324048161506653, 'timestamp': '2025-09-30 22:13:11.446682', 'step': 830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.480854', 'step': 830, 'epoch': 1} {'type': 'loss', 'content': 0.34634238481521606, 'timestamp': '2025-09-30 22:13:11.483442', 'step': 831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.512948', 'step': 831, 'epoch': 1} {'type': 'loss', 'content': 0.1858944147825241, 'timestamp': '2025-09-30 22:13:11.537057', 'step': 832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.566321', 'step': 832, 'epoch': 1} {'type': 'loss', 'content': 0.12415161728858948, 'timestamp': '2025-09-30 22:13:11.568823', 'step': 833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:11.602063', 'step': 833, 'epoch': 1} {'type': 'loss', 'content': 0.15693186223506927, 'timestamp': '2025-09-30 22:13:11.604242', 'step': 834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:11.637708', 'step': 834, 'epoch': 1} {'type': 'loss', 'content': 0.1419062465429306, 'timestamp': '2025-09-30 22:13:11.650739', 'step': 835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:11.680806', 'step': 835, 'epoch': 1} {'type': 'loss', 'content': 0.16126781702041626, 'timestamp': '2025-09-30 22:13:11.704534', 'step': 836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.733529', 'step': 836, 'epoch': 1} {'type': 'loss', 'content': 0.18174561858177185, 'timestamp': '2025-09-30 22:13:11.735795', 'step': 837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:11.766126', 'step': 837, 'epoch': 1} {'type': 'loss', 'content': 0.20005615055561066, 'timestamp': '2025-09-30 22:13:11.768798', 'step': 838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:11.800284', 'step': 838, 'epoch': 1} {'type': 'loss', 'content': 0.2984674274921417, 'timestamp': '2025-09-30 22:13:11.802755', 'step': 839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.835099', 'step': 839, 'epoch': 1} {'type': 'loss', 'content': 0.18436625599861145, 'timestamp': '2025-09-30 22:13:11.859766', 'step': 840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:11.889550', 'step': 840, 'epoch': 1} {'type': 'loss', 'content': 0.174295574426651, 'timestamp': '2025-09-30 22:13:11.891876', 'step': 841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.921948', 'step': 841, 'epoch': 1} {'type': 'loss', 'content': 0.10989858955144882, 'timestamp': '2025-09-30 22:13:11.924541', 'step': 842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.954311', 'step': 842, 'epoch': 1} {'type': 'loss', 'content': 0.24155329167842865, 'timestamp': '2025-09-30 22:13:11.959823', 'step': 843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:11.989641', 'step': 843, 'epoch': 1} {'type': 'loss', 'content': 0.2410827875137329, 'timestamp': '2025-09-30 22:13:12.014551', 'step': 844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:12.044441', 'step': 844, 'epoch': 1} {'type': 'loss', 'content': 0.19527538120746613, 'timestamp': '2025-09-30 22:13:12.047443', 'step': 845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:12.077172', 'step': 845, 'epoch': 1} {'type': 'loss', 'content': 0.22193101048469543, 'timestamp': '2025-09-30 22:13:12.080524', 'step': 846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.110386', 'step': 846, 'epoch': 1} {'type': 'loss', 'content': 0.18162187933921814, 'timestamp': '2025-09-30 22:13:12.113567', 'step': 847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:12.143775', 'step': 847, 'epoch': 1} {'type': 'loss', 'content': 0.23735825717449188, 'timestamp': '2025-09-30 22:13:12.168741', 'step': 848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.199486', 'step': 848, 'epoch': 1} {'type': 'loss', 'content': 0.1443953961133957, 'timestamp': '2025-09-30 22:13:12.201636', 'step': 849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:12.234231', 'step': 849, 'epoch': 1} {'type': 'loss', 'content': 0.16794586181640625, 'timestamp': '2025-09-30 22:13:12.236917', 'step': 850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.266748', 'step': 850, 'epoch': 1} {'type': 'loss', 'content': 0.23637107014656067, 'timestamp': '2025-09-30 22:13:12.269358', 'step': 851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.302358', 'step': 851, 'epoch': 1} {'type': 'loss', 'content': 0.1598939448595047, 'timestamp': '2025-09-30 22:13:12.329413', 'step': 852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.358431', 'step': 852, 'epoch': 1} {'type': 'loss', 'content': 0.16262385249137878, 'timestamp': '2025-09-30 22:13:12.363367', 'step': 853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:12.397077', 'step': 853, 'epoch': 1} {'type': 'loss', 'content': 0.184085875749588, 'timestamp': '2025-09-30 22:13:12.408785', 'step': 854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.437917', 'step': 854, 'epoch': 1} {'type': 'loss', 'content': 0.17532919347286224, 'timestamp': '2025-09-30 22:13:12.441367', 'step': 855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.471064', 'step': 855, 'epoch': 1} {'type': 'loss', 'content': 0.15763607621192932, 'timestamp': '2025-09-30 22:13:12.502010', 'step': 856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:12.531940', 'step': 856, 'epoch': 1} {'type': 'loss', 'content': 0.29745781421661377, 'timestamp': '2025-09-30 22:13:12.541017', 'step': 857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:12.570895', 'step': 857, 'epoch': 1} {'type': 'loss', 'content': 0.2166944295167923, 'timestamp': '2025-09-30 22:13:12.573560', 'step': 858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.604081', 'step': 858, 'epoch': 1} {'type': 'loss', 'content': 0.12354064732789993, 'timestamp': '2025-09-30 22:13:12.608067', 'step': 859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:12.637142', 'step': 859, 'epoch': 1} {'type': 'loss', 'content': 0.19552023708820343, 'timestamp': '2025-09-30 22:13:12.661110', 'step': 860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:12.692018', 'step': 860, 'epoch': 1} {'type': 'loss', 'content': 0.1422344446182251, 'timestamp': '2025-09-30 22:13:12.694900', 'step': 861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.728286', 'step': 861, 'epoch': 1} {'type': 'loss', 'content': 0.07573089003562927, 'timestamp': '2025-09-30 22:13:12.730542', 'step': 862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:12.764202', 'step': 862, 'epoch': 1} {'type': 'loss', 'content': 0.2716498076915741, 'timestamp': '2025-09-30 22:13:12.766943', 'step': 863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:12.800625', 'step': 863, 'epoch': 1} {'type': 'loss', 'content': 0.14792872965335846, 'timestamp': '2025-09-30 22:13:12.826459', 'step': 864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.857779', 'step': 864, 'epoch': 1} {'type': 'loss', 'content': 0.18744684755802155, 'timestamp': '2025-09-30 22:13:12.860252', 'step': 865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.889028', 'step': 865, 'epoch': 1} {'type': 'loss', 'content': 0.2199234962463379, 'timestamp': '2025-09-30 22:13:12.891473', 'step': 866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:12.922599', 'step': 866, 'epoch': 1} {'type': 'loss', 'content': 0.18694756925106049, 'timestamp': '2025-09-30 22:13:12.925034', 'step': 867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:12.965038', 'step': 867, 'epoch': 1} {'type': 'loss', 'content': 0.22541958093643188, 'timestamp': '2025-09-30 22:13:12.991248', 'step': 868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.020845', 'step': 868, 'epoch': 1} {'type': 'loss', 'content': 0.12704747915267944, 'timestamp': '2025-09-30 22:13:13.024096', 'step': 869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.061680', 'step': 869, 'epoch': 1} {'type': 'loss', 'content': 0.2154698669910431, 'timestamp': '2025-09-30 22:13:13.064409', 'step': 870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.100613', 'step': 870, 'epoch': 1} {'type': 'loss', 'content': 0.11106130480766296, 'timestamp': '2025-09-30 22:13:13.109256', 'step': 871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:13.139393', 'step': 871, 'epoch': 1} {'type': 'loss', 'content': 0.15762558579444885, 'timestamp': '2025-09-30 22:13:13.165045', 'step': 872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:13.195812', 'step': 872, 'epoch': 1} {'type': 'loss', 'content': 0.22202543914318085, 'timestamp': '2025-09-30 22:13:13.207735', 'step': 873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.238327', 'step': 873, 'epoch': 1} {'type': 'loss', 'content': 0.2509634792804718, 'timestamp': '2025-09-30 22:13:13.241077', 'step': 874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.271987', 'step': 874, 'epoch': 1} {'type': 'loss', 'content': 0.20943057537078857, 'timestamp': '2025-09-30 22:13:13.274886', 'step': 875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.304625', 'step': 875, 'epoch': 1} {'type': 'loss', 'content': 0.17916877567768097, 'timestamp': '2025-09-30 22:13:13.328866', 'step': 876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.363942', 'step': 876, 'epoch': 1} {'type': 'loss', 'content': 0.18322399258613586, 'timestamp': '2025-09-30 22:13:13.371243', 'step': 877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.401118', 'step': 877, 'epoch': 1} {'type': 'loss', 'content': 0.1585473120212555, 'timestamp': '2025-09-30 22:13:13.403948', 'step': 878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.434864', 'step': 878, 'epoch': 1} {'type': 'loss', 'content': 0.16307349503040314, 'timestamp': '2025-09-30 22:13:13.438906', 'step': 879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.469860', 'step': 879, 'epoch': 1} {'type': 'loss', 'content': 0.14911329746246338, 'timestamp': '2025-09-30 22:13:13.494333', 'step': 880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.524398', 'step': 880, 'epoch': 1} {'type': 'loss', 'content': 0.13848423957824707, 'timestamp': '2025-09-30 22:13:13.531619', 'step': 881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.562004', 'step': 881, 'epoch': 1} {'type': 'loss', 'content': 0.23281362652778625, 'timestamp': '2025-09-30 22:13:13.565463', 'step': 882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.603304', 'step': 882, 'epoch': 1} {'type': 'loss', 'content': 0.17490854859352112, 'timestamp': '2025-09-30 22:13:13.606845', 'step': 883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:13.636301', 'step': 883, 'epoch': 1} {'type': 'loss', 'content': 0.21281683444976807, 'timestamp': '2025-09-30 22:13:13.661131', 'step': 884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.691306', 'step': 884, 'epoch': 1} {'type': 'loss', 'content': 0.1644420325756073, 'timestamp': '2025-09-30 22:13:13.696681', 'step': 885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.727592', 'step': 885, 'epoch': 1} {'type': 'loss', 'content': 0.3541659116744995, 'timestamp': '2025-09-30 22:13:13.740570', 'step': 886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:13.774538', 'step': 886, 'epoch': 1} {'type': 'loss', 'content': 0.2463274896144867, 'timestamp': '2025-09-30 22:13:13.777736', 'step': 887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.808020', 'step': 887, 'epoch': 1} {'type': 'loss', 'content': 0.27296245098114014, 'timestamp': '2025-09-30 22:13:13.832860', 'step': 888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.864422', 'step': 888, 'epoch': 1} {'type': 'loss', 'content': 0.2310267686843872, 'timestamp': '2025-09-30 22:13:13.885230', 'step': 889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:13.916630', 'step': 889, 'epoch': 1} {'type': 'loss', 'content': 0.12479200214147568, 'timestamp': '2025-09-30 22:13:13.920674', 'step': 890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:13.958714', 'step': 890, 'epoch': 1} {'type': 'loss', 'content': 0.2589215934276581, 'timestamp': '2025-09-30 22:13:13.963234', 'step': 891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:13.993151', 'step': 891, 'epoch': 1} {'type': 'loss', 'content': 0.18550075590610504, 'timestamp': '2025-09-30 22:13:14.019147', 'step': 892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.052673', 'step': 892, 'epoch': 1} {'type': 'loss', 'content': 0.14130626618862152, 'timestamp': '2025-09-30 22:13:14.062672', 'step': 893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.100146', 'step': 893, 'epoch': 1} {'type': 'loss', 'content': 0.1959240734577179, 'timestamp': '2025-09-30 22:13:14.110976', 'step': 894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.147566', 'step': 894, 'epoch': 1} {'type': 'loss', 'content': 0.13668352365493774, 'timestamp': '2025-09-30 22:13:14.151654', 'step': 895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.194143', 'step': 895, 'epoch': 1} {'type': 'loss', 'content': 0.21006622910499573, 'timestamp': '2025-09-30 22:13:14.220224', 'step': 896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:14.250046', 'step': 896, 'epoch': 1} {'type': 'loss', 'content': 0.18529056012630463, 'timestamp': '2025-09-30 22:13:14.253725', 'step': 897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.291626', 'step': 897, 'epoch': 1} {'type': 'loss', 'content': 0.16637203097343445, 'timestamp': '2025-09-30 22:13:14.295627', 'step': 898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.327031', 'step': 898, 'epoch': 1} {'type': 'loss', 'content': 0.24459148943424225, 'timestamp': '2025-09-30 22:13:14.330484', 'step': 899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.361146', 'step': 899, 'epoch': 1} {'type': 'loss', 'content': 0.365676611661911, 'timestamp': '2025-09-30 22:13:14.391608', 'step': 900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.429306', 'step': 900, 'epoch': 1} {'type': 'loss', 'content': 0.14659258723258972, 'timestamp': '2025-09-30 22:13:14.439640', 'step': 901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:14.473599', 'step': 901, 'epoch': 1} {'type': 'loss', 'content': 0.15375393629074097, 'timestamp': '2025-09-30 22:13:14.484993', 'step': 902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:14.521414', 'step': 902, 'epoch': 1} {'type': 'loss', 'content': 0.163889542222023, 'timestamp': '2025-09-30 22:13:14.529938', 'step': 903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:14.561384', 'step': 903, 'epoch': 1} {'type': 'loss', 'content': 0.20497487485408783, 'timestamp': '2025-09-30 22:13:14.586021', 'step': 904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:14.621992', 'step': 904, 'epoch': 1} {'type': 'loss', 'content': 0.18031929433345795, 'timestamp': '2025-09-30 22:13:14.625589', 'step': 905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:14.656295', 'step': 905, 'epoch': 1} {'type': 'loss', 'content': 0.20815321803092957, 'timestamp': '2025-09-30 22:13:14.668016', 'step': 906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.700906', 'step': 906, 'epoch': 1} {'type': 'loss', 'content': 0.140524223446846, 'timestamp': '2025-09-30 22:13:14.704418', 'step': 907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.736966', 'step': 907, 'epoch': 1} {'type': 'loss', 'content': 0.20680615305900574, 'timestamp': '2025-09-30 22:13:14.786785', 'step': 908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:14.820522', 'step': 908, 'epoch': 1} {'type': 'loss', 'content': 0.2776170074939728, 'timestamp': '2025-09-30 22:13:14.825340', 'step': 909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.857626', 'step': 909, 'epoch': 1} {'type': 'loss', 'content': 0.18381105363368988, 'timestamp': '2025-09-30 22:13:14.863347', 'step': 910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:14.898367', 'step': 910, 'epoch': 1} {'type': 'loss', 'content': 0.20945137739181519, 'timestamp': '2025-09-30 22:13:14.901595', 'step': 911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:14.932115', 'step': 911, 'epoch': 1} {'type': 'loss', 'content': 0.2152426391839981, 'timestamp': '2025-09-30 22:13:14.958807', 'step': 912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:14.999197', 'step': 912, 'epoch': 1} {'type': 'loss', 'content': 0.14669880270957947, 'timestamp': '2025-09-30 22:13:15.004705', 'step': 913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:15.037947', 'step': 913, 'epoch': 1} {'type': 'loss', 'content': 0.09224028885364532, 'timestamp': '2025-09-30 22:13:15.041547', 'step': 914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:13:15.077435', 'step': 914, 'epoch': 1} {'type': 'loss', 'content': 0.26004698872566223, 'timestamp': '2025-09-30 22:13:15.090085', 'step': 915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.120868', 'step': 915, 'epoch': 1} {'type': 'loss', 'content': 0.18175272643566132, 'timestamp': '2025-09-30 22:13:15.145912', 'step': 916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:15.177038', 'step': 916, 'epoch': 1} {'type': 'loss', 'content': 0.3017756938934326, 'timestamp': '2025-09-30 22:13:15.188335', 'step': 917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.224230', 'step': 917, 'epoch': 1} {'type': 'loss', 'content': 0.16903193295001984, 'timestamp': '2025-09-30 22:13:15.227516', 'step': 918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.270865', 'step': 918, 'epoch': 1} {'type': 'loss', 'content': 0.30809351801872253, 'timestamp': '2025-09-30 22:13:15.279894', 'step': 919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.318674', 'step': 919, 'epoch': 1} {'type': 'loss', 'content': 0.12236611545085907, 'timestamp': '2025-09-30 22:13:15.343489', 'step': 920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.375924', 'step': 920, 'epoch': 1} {'type': 'loss', 'content': 0.23840515315532684, 'timestamp': '2025-09-30 22:13:15.382108', 'step': 921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.412818', 'step': 921, 'epoch': 1} {'type': 'loss', 'content': 0.16837048530578613, 'timestamp': '2025-09-30 22:13:15.421288', 'step': 922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.453426', 'step': 922, 'epoch': 1} {'type': 'loss', 'content': 0.18205730617046356, 'timestamp': '2025-09-30 22:13:15.455431', 'step': 923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.487013', 'step': 923, 'epoch': 1} {'type': 'loss', 'content': 0.279255211353302, 'timestamp': '2025-09-30 22:13:15.511080', 'step': 924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:15.542489', 'step': 924, 'epoch': 1} {'type': 'loss', 'content': 0.1558484584093094, 'timestamp': '2025-09-30 22:13:15.546588', 'step': 925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:15.582842', 'step': 925, 'epoch': 1} {'type': 'loss', 'content': 0.25280627608299255, 'timestamp': '2025-09-30 22:13:15.586882', 'step': 926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:15.617805', 'step': 926, 'epoch': 1} {'type': 'loss', 'content': 0.26987454295158386, 'timestamp': '2025-09-30 22:13:15.620889', 'step': 927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.650852', 'step': 927, 'epoch': 1} {'type': 'loss', 'content': 0.1696050465106964, 'timestamp': '2025-09-30 22:13:15.675614', 'step': 928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:15.705508', 'step': 928, 'epoch': 1} {'type': 'loss', 'content': 0.15426132082939148, 'timestamp': '2025-09-30 22:13:15.708228', 'step': 929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.739061', 'step': 929, 'epoch': 1} {'type': 'loss', 'content': 0.1899596005678177, 'timestamp': '2025-09-30 22:13:15.741682', 'step': 930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:15.772605', 'step': 930, 'epoch': 1} {'type': 'loss', 'content': 0.15752114355564117, 'timestamp': '2025-09-30 22:13:15.776230', 'step': 931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:15.806605', 'step': 931, 'epoch': 1} {'type': 'loss', 'content': 0.32578036189079285, 'timestamp': '2025-09-30 22:13:15.831764', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:13:23.151641', 'step': 932, 'epoch': 1} {'type': 'pplx', 'content': 7091.001736438246, 'timestamp': '2025-09-30 22:13:23.156016', 'step': 932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.188876', 'step': 932, 'epoch': 1} {'type': 'loss', 'content': 0.1203814372420311, 'timestamp': '2025-09-30 22:13:23.196107', 'step': 933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.228109', 'step': 933, 'epoch': 1} {'type': 'loss', 'content': 0.14400213956832886, 'timestamp': '2025-09-30 22:13:23.235453', 'step': 934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.265456', 'step': 934, 'epoch': 1} {'type': 'loss', 'content': 0.2717946171760559, 'timestamp': '2025-09-30 22:13:23.272011', 'step': 935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.303740', 'step': 935, 'epoch': 1} {'type': 'loss', 'content': 0.14287447929382324, 'timestamp': '2025-09-30 22:13:23.328401', 'step': 936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.365255', 'step': 936, 'epoch': 1} {'type': 'loss', 'content': 0.24024222791194916, 'timestamp': '2025-09-30 22:13:23.372055', 'step': 937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:23.404795', 'step': 937, 'epoch': 1} {'type': 'loss', 'content': 0.2967231869697571, 'timestamp': '2025-09-30 22:13:23.414634', 'step': 938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:23.451297', 'step': 938, 'epoch': 1} {'type': 'loss', 'content': 0.22545289993286133, 'timestamp': '2025-09-30 22:13:23.455931', 'step': 939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.485581', 'step': 939, 'epoch': 1} {'type': 'loss', 'content': 0.22476264834403992, 'timestamp': '2025-09-30 22:13:23.510674', 'step': 940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.542552', 'step': 940, 'epoch': 1} {'type': 'loss', 'content': 0.15332089364528656, 'timestamp': '2025-09-30 22:13:23.547373', 'step': 941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.579242', 'step': 941, 'epoch': 1} {'type': 'loss', 'content': 0.2484113723039627, 'timestamp': '2025-09-30 22:13:23.586131', 'step': 942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.623047', 'step': 942, 'epoch': 1} {'type': 'loss', 'content': 0.21518845856189728, 'timestamp': '2025-09-30 22:13:23.626164', 'step': 943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.657128', 'step': 943, 'epoch': 1} {'type': 'loss', 'content': 0.14866505563259125, 'timestamp': '2025-09-30 22:13:23.681998', 'step': 944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.717424', 'step': 944, 'epoch': 1} {'type': 'loss', 'content': 0.18940013647079468, 'timestamp': '2025-09-30 22:13:23.720547', 'step': 945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.752633', 'step': 945, 'epoch': 1} {'type': 'loss', 'content': 0.1197379007935524, 'timestamp': '2025-09-30 22:13:23.764906', 'step': 946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.795312', 'step': 946, 'epoch': 1} {'type': 'loss', 'content': 0.25865983963012695, 'timestamp': '2025-09-30 22:13:23.807087', 'step': 947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:23.838271', 'step': 947, 'epoch': 1} {'type': 'loss', 'content': 0.2357165366411209, 'timestamp': '2025-09-30 22:13:23.863932', 'step': 948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:23.901540', 'step': 948, 'epoch': 1} {'type': 'loss', 'content': 0.20902833342552185, 'timestamp': '2025-09-30 22:13:23.909340', 'step': 949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:23.946130', 'step': 949, 'epoch': 1} {'type': 'loss', 'content': 0.1692659705877304, 'timestamp': '2025-09-30 22:13:23.954296', 'step': 950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:23.985741', 'step': 950, 'epoch': 1} {'type': 'loss', 'content': 0.22713316977024078, 'timestamp': '2025-09-30 22:13:23.993023', 'step': 951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:24.026738', 'step': 951, 'epoch': 1} {'type': 'loss', 'content': 0.1888333261013031, 'timestamp': '2025-09-30 22:13:24.051089', 'step': 952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.084037', 'step': 952, 'epoch': 1} {'type': 'loss', 'content': 0.11811203509569168, 'timestamp': '2025-09-30 22:13:24.087255', 'step': 953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:24.120806', 'step': 953, 'epoch': 1} {'type': 'loss', 'content': 0.129314586520195, 'timestamp': '2025-09-30 22:13:24.123576', 'step': 954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.155169', 'step': 954, 'epoch': 1} {'type': 'loss', 'content': 0.1466420739889145, 'timestamp': '2025-09-30 22:13:24.160855', 'step': 955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.193245', 'step': 955, 'epoch': 1} {'type': 'loss', 'content': 0.12530545890331268, 'timestamp': '2025-09-30 22:13:24.217608', 'step': 956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.251911', 'step': 956, 'epoch': 1} {'type': 'loss', 'content': 0.2058342695236206, 'timestamp': '2025-09-30 22:13:24.256702', 'step': 957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:24.290552', 'step': 957, 'epoch': 1} {'type': 'loss', 'content': 0.1910347193479538, 'timestamp': '2025-09-30 22:13:24.294164', 'step': 958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.324604', 'step': 958, 'epoch': 1} {'type': 'loss', 'content': 0.24331718683242798, 'timestamp': '2025-09-30 22:13:24.326821', 'step': 959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.357823', 'step': 959, 'epoch': 1} {'type': 'loss', 'content': 0.16904316842556, 'timestamp': '2025-09-30 22:13:24.383776', 'step': 960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.413954', 'step': 960, 'epoch': 1} {'type': 'loss', 'content': 0.15420056879520416, 'timestamp': '2025-09-30 22:13:24.415924', 'step': 961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:24.447220', 'step': 961, 'epoch': 1} {'type': 'loss', 'content': 0.1732543557882309, 'timestamp': '2025-09-30 22:13:24.450262', 'step': 962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.482767', 'step': 962, 'epoch': 1} {'type': 'loss', 'content': 0.19092348217964172, 'timestamp': '2025-09-30 22:13:24.485763', 'step': 963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.517125', 'step': 963, 'epoch': 1} {'type': 'loss', 'content': 0.25076398253440857, 'timestamp': '2025-09-30 22:13:24.541796', 'step': 964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:24.577207', 'step': 964, 'epoch': 1} {'type': 'loss', 'content': 0.1985359489917755, 'timestamp': '2025-09-30 22:13:24.579853', 'step': 965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.611405', 'step': 965, 'epoch': 1} {'type': 'loss', 'content': 0.21456265449523926, 'timestamp': '2025-09-30 22:13:24.614489', 'step': 966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:24.646724', 'step': 966, 'epoch': 1} {'type': 'loss', 'content': 0.20608286559581757, 'timestamp': '2025-09-30 22:13:24.649921', 'step': 967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:24.681662', 'step': 967, 'epoch': 1} {'type': 'loss', 'content': 0.2529113292694092, 'timestamp': '2025-09-30 22:13:24.712791', 'step': 968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.747480', 'step': 968, 'epoch': 1} {'type': 'loss', 'content': 0.16507165133953094, 'timestamp': '2025-09-30 22:13:24.751295', 'step': 969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:24.786026', 'step': 969, 'epoch': 1} {'type': 'loss', 'content': 0.27214983105659485, 'timestamp': '2025-09-30 22:13:24.793665', 'step': 970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:24.829524', 'step': 970, 'epoch': 1} {'type': 'loss', 'content': 0.15627096593379974, 'timestamp': '2025-09-30 22:13:24.834206', 'step': 971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:24.874354', 'step': 971, 'epoch': 1} {'type': 'loss', 'content': 0.2794053852558136, 'timestamp': '2025-09-30 22:13:24.898839', 'step': 972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:24.931614', 'step': 972, 'epoch': 1} {'type': 'loss', 'content': 0.1976337879896164, 'timestamp': '2025-09-30 22:13:24.939179', 'step': 973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:24.974059', 'step': 973, 'epoch': 1} {'type': 'loss', 'content': 0.15199783444404602, 'timestamp': '2025-09-30 22:13:24.981343', 'step': 974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.019388', 'step': 974, 'epoch': 1} {'type': 'loss', 'content': 0.24570363759994507, 'timestamp': '2025-09-30 22:13:25.023110', 'step': 975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:25.054378', 'step': 975, 'epoch': 1} {'type': 'loss', 'content': 0.1690618395805359, 'timestamp': '2025-09-30 22:13:25.078782', 'step': 976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.108887', 'step': 976, 'epoch': 1} {'type': 'loss', 'content': 0.18338052928447723, 'timestamp': '2025-09-30 22:13:25.111254', 'step': 977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.142514', 'step': 977, 'epoch': 1} {'type': 'loss', 'content': 0.2140326201915741, 'timestamp': '2025-09-30 22:13:25.149778', 'step': 978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.184378', 'step': 978, 'epoch': 1} {'type': 'loss', 'content': 0.23347659409046173, 'timestamp': '2025-09-30 22:13:25.186522', 'step': 979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.217543', 'step': 979, 'epoch': 1} {'type': 'loss', 'content': 0.19080504775047302, 'timestamp': '2025-09-30 22:13:25.242486', 'step': 980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.272498', 'step': 980, 'epoch': 1} {'type': 'loss', 'content': 0.12832167744636536, 'timestamp': '2025-09-30 22:13:25.275336', 'step': 981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.305170', 'step': 981, 'epoch': 1} {'type': 'loss', 'content': 0.1333688497543335, 'timestamp': '2025-09-30 22:13:25.306950', 'step': 982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.342505', 'step': 982, 'epoch': 1} {'type': 'loss', 'content': 0.13714489340782166, 'timestamp': '2025-09-30 22:13:25.345113', 'step': 983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.375670', 'step': 983, 'epoch': 1} {'type': 'loss', 'content': 0.3192492425441742, 'timestamp': '2025-09-30 22:13:25.399205', 'step': 984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.430409', 'step': 984, 'epoch': 1} {'type': 'loss', 'content': 0.17796620726585388, 'timestamp': '2025-09-30 22:13:25.433282', 'step': 985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.465814', 'step': 985, 'epoch': 1} {'type': 'loss', 'content': 0.08630508184432983, 'timestamp': '2025-09-30 22:13:25.468129', 'step': 986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.500960', 'step': 986, 'epoch': 1} {'type': 'loss', 'content': 0.148395374417305, 'timestamp': '2025-09-30 22:13:25.510129', 'step': 987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:25.542493', 'step': 987, 'epoch': 1} {'type': 'loss', 'content': 0.25397807359695435, 'timestamp': '2025-09-30 22:13:25.566900', 'step': 988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.610837', 'step': 988, 'epoch': 1} {'type': 'loss', 'content': 0.22818361222743988, 'timestamp': '2025-09-30 22:13:25.615611', 'step': 989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.652111', 'step': 989, 'epoch': 1} {'type': 'loss', 'content': 0.1658000648021698, 'timestamp': '2025-09-30 22:13:25.656354', 'step': 990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.687842', 'step': 990, 'epoch': 1} {'type': 'loss', 'content': 0.10952649265527725, 'timestamp': '2025-09-30 22:13:25.690760', 'step': 991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.720651', 'step': 991, 'epoch': 1} {'type': 'loss', 'content': 0.18838900327682495, 'timestamp': '2025-09-30 22:13:25.744312', 'step': 992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.776241', 'step': 992, 'epoch': 1} {'type': 'loss', 'content': 0.20261023938655853, 'timestamp': '2025-09-30 22:13:25.780411', 'step': 993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:25.815482', 'step': 993, 'epoch': 1} {'type': 'loss', 'content': 0.24009160697460175, 'timestamp': '2025-09-30 22:13:25.819204', 'step': 994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:25.849842', 'step': 994, 'epoch': 1} {'type': 'loss', 'content': 0.15652281045913696, 'timestamp': '2025-09-30 22:13:25.852972', 'step': 995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:25.884918', 'step': 995, 'epoch': 1} {'type': 'loss', 'content': 0.22941608726978302, 'timestamp': '2025-09-30 22:13:25.914664', 'step': 996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.946590', 'step': 996, 'epoch': 1} {'type': 'loss', 'content': 0.17103278636932373, 'timestamp': '2025-09-30 22:13:25.951832', 'step': 997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:25.983827', 'step': 997, 'epoch': 1} {'type': 'loss', 'content': 0.19695605337619781, 'timestamp': '2025-09-30 22:13:25.993254', 'step': 998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:26.028423', 'step': 998, 'epoch': 1} {'type': 'loss', 'content': 0.18226340413093567, 'timestamp': '2025-09-30 22:13:26.033910', 'step': 999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:26.063555', 'step': 999, 'epoch': 1} {'type': 'loss', 'content': 0.14511637389659882, 'timestamp': '2025-09-30 22:13:26.091873', 'step': 1000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1000', 'timestamp': '2025-09-30 22:13:31.111624', 'step': 1000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.143241', 'step': 1000, 'epoch': 1} {'type': 'loss', 'content': 0.18434776365756989, 'timestamp': '2025-09-30 22:13:31.146491', 'step': 1001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.176890', 'step': 1001, 'epoch': 1} {'type': 'loss', 'content': 0.23223015666007996, 'timestamp': '2025-09-30 22:13:31.180703', 'step': 1002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.211102', 'step': 1002, 'epoch': 1} {'type': 'loss', 'content': 0.3000710904598236, 'timestamp': '2025-09-30 22:13:31.219031', 'step': 1003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.254765', 'step': 1003, 'epoch': 1} {'type': 'loss', 'content': 0.1519753485918045, 'timestamp': '2025-09-30 22:13:31.278975', 'step': 1004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.313649', 'step': 1004, 'epoch': 1} {'type': 'loss', 'content': 0.19199545681476593, 'timestamp': '2025-09-30 22:13:31.324216', 'step': 1005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.356936', 'step': 1005, 'epoch': 1} {'type': 'loss', 'content': 0.16067633032798767, 'timestamp': '2025-09-30 22:13:31.365788', 'step': 1006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.397757', 'step': 1006, 'epoch': 1} {'type': 'loss', 'content': 0.20598337054252625, 'timestamp': '2025-09-30 22:13:31.405715', 'step': 1007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.439674', 'step': 1007, 'epoch': 1} {'type': 'loss', 'content': 0.14770208299160004, 'timestamp': '2025-09-30 22:13:31.464761', 'step': 1008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:31.495662', 'step': 1008, 'epoch': 1} {'type': 'loss', 'content': 0.23487213253974915, 'timestamp': '2025-09-30 22:13:31.498413', 'step': 1009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.529562', 'step': 1009, 'epoch': 1} {'type': 'loss', 'content': 0.12739382684230804, 'timestamp': '2025-09-30 22:13:31.532332', 'step': 1010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:31.562705', 'step': 1010, 'epoch': 1} {'type': 'loss', 'content': 0.23875117301940918, 'timestamp': '2025-09-30 22:13:31.565702', 'step': 1011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:31.596569', 'step': 1011, 'epoch': 1} {'type': 'loss', 'content': 0.2027519941329956, 'timestamp': '2025-09-30 22:13:31.620512', 'step': 1012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:31.650897', 'step': 1012, 'epoch': 1} {'type': 'loss', 'content': 0.23768456280231476, 'timestamp': '2025-09-30 22:13:31.653482', 'step': 1013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.689789', 'step': 1013, 'epoch': 1} {'type': 'loss', 'content': 0.18861831724643707, 'timestamp': '2025-09-30 22:13:31.692349', 'step': 1014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.724331', 'step': 1014, 'epoch': 1} {'type': 'loss', 'content': 0.12127860635519028, 'timestamp': '2025-09-30 22:13:31.727961', 'step': 1015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.759389', 'step': 1015, 'epoch': 1} {'type': 'loss', 'content': 0.3015111982822418, 'timestamp': '2025-09-30 22:13:31.784145', 'step': 1016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.819660', 'step': 1016, 'epoch': 1} {'type': 'loss', 'content': 0.14769881963729858, 'timestamp': '2025-09-30 22:13:31.823509', 'step': 1017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:31.854647', 'step': 1017, 'epoch': 1} {'type': 'loss', 'content': 0.12570759654045105, 'timestamp': '2025-09-30 22:13:31.859724', 'step': 1018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:31.892074', 'step': 1018, 'epoch': 1} {'type': 'loss', 'content': 0.15051746368408203, 'timestamp': '2025-09-30 22:13:31.895289', 'step': 1019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.926412', 'step': 1019, 'epoch': 1} {'type': 'loss', 'content': 0.21238796412944794, 'timestamp': '2025-09-30 22:13:31.956989', 'step': 1020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:31.990100', 'step': 1020, 'epoch': 1} {'type': 'loss', 'content': 0.17331747710704803, 'timestamp': '2025-09-30 22:13:31.995397', 'step': 1021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.030897', 'step': 1021, 'epoch': 1} {'type': 'loss', 'content': 0.2815437316894531, 'timestamp': '2025-09-30 22:13:32.034855', 'step': 1022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:32.066711', 'step': 1022, 'epoch': 1} {'type': 'loss', 'content': 0.18487021327018738, 'timestamp': '2025-09-30 22:13:32.069696', 'step': 1023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.101308', 'step': 1023, 'epoch': 1} {'type': 'loss', 'content': 0.160420224070549, 'timestamp': '2025-09-30 22:13:32.130624', 'step': 1024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:32.163484', 'step': 1024, 'epoch': 1} {'type': 'loss', 'content': 0.1291353404521942, 'timestamp': '2025-09-30 22:13:32.166982', 'step': 1025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:32.197781', 'step': 1025, 'epoch': 1} {'type': 'loss', 'content': 0.20265012979507446, 'timestamp': '2025-09-30 22:13:32.204137', 'step': 1026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.235330', 'step': 1026, 'epoch': 1} {'type': 'loss', 'content': 0.149691641330719, 'timestamp': '2025-09-30 22:13:32.238577', 'step': 1027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.271943', 'step': 1027, 'epoch': 1} {'type': 'loss', 'content': 0.2663230001926422, 'timestamp': '2025-09-30 22:13:32.310119', 'step': 1028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.352188', 'step': 1028, 'epoch': 1} {'type': 'loss', 'content': 0.2531779706478119, 'timestamp': '2025-09-30 22:13:32.378867', 'step': 1029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:32.431025', 'step': 1029, 'epoch': 1} {'type': 'loss', 'content': 0.14689940214157104, 'timestamp': '2025-09-30 22:13:32.456015', 'step': 1030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:32.502990', 'step': 1030, 'epoch': 1} {'type': 'loss', 'content': 0.2984031140804291, 'timestamp': '2025-09-30 22:13:32.527785', 'step': 1031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.578655', 'step': 1031, 'epoch': 1} {'type': 'loss', 'content': 0.18704986572265625, 'timestamp': '2025-09-30 22:13:32.635024', 'step': 1032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:32.675591', 'step': 1032, 'epoch': 1} {'type': 'loss', 'content': 0.274787038564682, 'timestamp': '2025-09-30 22:13:32.685759', 'step': 1033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:32.720642', 'step': 1033, 'epoch': 1} {'type': 'loss', 'content': 0.14398959279060364, 'timestamp': '2025-09-30 22:13:32.735514', 'step': 1034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:32.776927', 'step': 1034, 'epoch': 1} {'type': 'loss', 'content': 0.28725162148475647, 'timestamp': '2025-09-30 22:13:32.785311', 'step': 1035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:32.829322', 'step': 1035, 'epoch': 1} {'type': 'loss', 'content': 0.25605571269989014, 'timestamp': '2025-09-30 22:13:32.858095', 'step': 1036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:32.897524', 'step': 1036, 'epoch': 1} {'type': 'loss', 'content': 0.21717704832553864, 'timestamp': '2025-09-30 22:13:32.907649', 'step': 1037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:32.953104', 'step': 1037, 'epoch': 1} {'type': 'loss', 'content': 0.15588076412677765, 'timestamp': '2025-09-30 22:13:32.971424', 'step': 1038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:33.007328', 'step': 1038, 'epoch': 1} {'type': 'loss', 'content': 0.12609124183654785, 'timestamp': '2025-09-30 22:13:33.023139', 'step': 1039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:33.062130', 'step': 1039, 'epoch': 1} {'type': 'loss', 'content': 0.212763711810112, 'timestamp': '2025-09-30 22:13:33.103167', 'step': 1040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:33.141316', 'step': 1040, 'epoch': 1} {'type': 'loss', 'content': 0.23766498267650604, 'timestamp': '2025-09-30 22:13:33.150948', 'step': 1041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:33.196662', 'step': 1041, 'epoch': 1} {'type': 'loss', 'content': 0.23600651323795319, 'timestamp': '2025-09-30 22:13:33.229025', 'step': 1042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:33.275452', 'step': 1042, 'epoch': 1} {'type': 'loss', 'content': 0.15666070580482483, 'timestamp': '2025-09-30 22:13:33.285895', 'step': 1043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:33.332575', 'step': 1043, 'epoch': 1} {'type': 'loss', 'content': 0.22384166717529297, 'timestamp': '2025-09-30 22:13:33.372077', 'step': 1044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:33.416544', 'step': 1044, 'epoch': 1} {'type': 'loss', 'content': 0.15897344052791595, 'timestamp': '2025-09-30 22:13:33.435876', 'step': 1045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:33.478116', 'step': 1045, 'epoch': 1} {'type': 'loss', 'content': 0.10952581465244293, 'timestamp': '2025-09-30 22:13:33.485577', 'step': 1046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:33.521662', 'step': 1046, 'epoch': 1} {'type': 'loss', 'content': 0.21190568804740906, 'timestamp': '2025-09-30 22:13:33.536101', 'step': 1047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:33.579657', 'step': 1047, 'epoch': 1} {'type': 'loss', 'content': 0.22960320115089417, 'timestamp': '2025-09-30 22:13:33.611667', 'step': 1048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:33.663327', 'step': 1048, 'epoch': 1} {'type': 'loss', 'content': 0.24153220653533936, 'timestamp': '2025-09-30 22:13:33.668198', 'step': 1049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:33.711220', 'step': 1049, 'epoch': 1} {'type': 'loss', 'content': 0.1756487786769867, 'timestamp': '2025-09-30 22:13:33.723273', 'step': 1050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:33.759259', 'step': 1050, 'epoch': 1} {'type': 'loss', 'content': 0.13685034215450287, 'timestamp': '2025-09-30 22:13:33.770726', 'step': 1051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:33.807949', 'step': 1051, 'epoch': 1} {'type': 'loss', 'content': 0.15220779180526733, 'timestamp': '2025-09-30 22:13:33.836810', 'step': 1052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:33.875485', 'step': 1052, 'epoch': 1} {'type': 'loss', 'content': 0.1754404455423355, 'timestamp': '2025-09-30 22:13:33.891813', 'step': 1053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:33.931606', 'step': 1053, 'epoch': 1} {'type': 'loss', 'content': 0.15411663055419922, 'timestamp': '2025-09-30 22:13:33.979022', 'step': 1054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.013324', 'step': 1054, 'epoch': 1} {'type': 'loss', 'content': 0.1355644166469574, 'timestamp': '2025-09-30 22:13:34.021549', 'step': 1055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:34.070362', 'step': 1055, 'epoch': 1} {'type': 'loss', 'content': 0.24040278792381287, 'timestamp': '2025-09-30 22:13:34.096470', 'step': 1056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.133112', 'step': 1056, 'epoch': 1} {'type': 'loss', 'content': 0.27000871300697327, 'timestamp': '2025-09-30 22:13:34.136135', 'step': 1057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.174520', 'step': 1057, 'epoch': 1} {'type': 'loss', 'content': 0.16769516468048096, 'timestamp': '2025-09-30 22:13:34.178465', 'step': 1058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.209669', 'step': 1058, 'epoch': 1} {'type': 'loss', 'content': 0.20495696365833282, 'timestamp': '2025-09-30 22:13:34.212739', 'step': 1059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:34.248066', 'step': 1059, 'epoch': 1} {'type': 'loss', 'content': 0.2576328217983246, 'timestamp': '2025-09-30 22:13:34.272635', 'step': 1060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.304976', 'step': 1060, 'epoch': 1} {'type': 'loss', 'content': 0.31895726919174194, 'timestamp': '2025-09-30 22:13:34.308104', 'step': 1061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.338151', 'step': 1061, 'epoch': 1} {'type': 'loss', 'content': 0.143524169921875, 'timestamp': '2025-09-30 22:13:34.341385', 'step': 1062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.373975', 'step': 1062, 'epoch': 1} {'type': 'loss', 'content': 0.16636191308498383, 'timestamp': '2025-09-30 22:13:34.377116', 'step': 1063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.407706', 'step': 1063, 'epoch': 1} {'type': 'loss', 'content': 0.16693811118602753, 'timestamp': '2025-09-30 22:13:34.438871', 'step': 1064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.474383', 'step': 1064, 'epoch': 1} {'type': 'loss', 'content': 0.1827581226825714, 'timestamp': '2025-09-30 22:13:34.478160', 'step': 1065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.508015', 'step': 1065, 'epoch': 1} {'type': 'loss', 'content': 0.2062283456325531, 'timestamp': '2025-09-30 22:13:34.514298', 'step': 1066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:34.545740', 'step': 1066, 'epoch': 1} {'type': 'loss', 'content': 0.20672985911369324, 'timestamp': '2025-09-30 22:13:34.549455', 'step': 1067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.583822', 'step': 1067, 'epoch': 1} {'type': 'loss', 'content': 0.1756483018398285, 'timestamp': '2025-09-30 22:13:34.609716', 'step': 1068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.640198', 'step': 1068, 'epoch': 1} {'type': 'loss', 'content': 0.14742428064346313, 'timestamp': '2025-09-30 22:13:34.647580', 'step': 1069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.678095', 'step': 1069, 'epoch': 1} {'type': 'loss', 'content': 0.18612238764762878, 'timestamp': '2025-09-30 22:13:34.680976', 'step': 1070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.717077', 'step': 1070, 'epoch': 1} {'type': 'loss', 'content': 0.22938425838947296, 'timestamp': '2025-09-30 22:13:34.725024', 'step': 1071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:34.755550', 'step': 1071, 'epoch': 1} {'type': 'loss', 'content': 0.20581211149692535, 'timestamp': '2025-09-30 22:13:34.779608', 'step': 1072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.810802', 'step': 1072, 'epoch': 1} {'type': 'loss', 'content': 0.17908841371536255, 'timestamp': '2025-09-30 22:13:34.814003', 'step': 1073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.847354', 'step': 1073, 'epoch': 1} {'type': 'loss', 'content': 0.18607808649539948, 'timestamp': '2025-09-30 22:13:34.850602', 'step': 1074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:34.881098', 'step': 1074, 'epoch': 1} {'type': 'loss', 'content': 0.17291976511478424, 'timestamp': '2025-09-30 22:13:34.890918', 'step': 1075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.921132', 'step': 1075, 'epoch': 1} {'type': 'loss', 'content': 0.12377041578292847, 'timestamp': '2025-09-30 22:13:34.945288', 'step': 1076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:34.983270', 'step': 1076, 'epoch': 1} {'type': 'loss', 'content': 0.2579899728298187, 'timestamp': '2025-09-30 22:13:34.985749', 'step': 1077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:35.016701', 'step': 1077, 'epoch': 1} {'type': 'loss', 'content': 0.1856401115655899, 'timestamp': '2025-09-30 22:13:35.023278', 'step': 1078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.057615', 'step': 1078, 'epoch': 1} {'type': 'loss', 'content': 0.1084197387099266, 'timestamp': '2025-09-30 22:13:35.067233', 'step': 1079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.098526', 'step': 1079, 'epoch': 1} {'type': 'loss', 'content': 0.2571263909339905, 'timestamp': '2025-09-30 22:13:35.128188', 'step': 1080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:35.159030', 'step': 1080, 'epoch': 1} {'type': 'loss', 'content': 0.13511069118976593, 'timestamp': '2025-09-30 22:13:35.166626', 'step': 1081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.205173', 'step': 1081, 'epoch': 1} {'type': 'loss', 'content': 0.14418692886829376, 'timestamp': '2025-09-30 22:13:35.207721', 'step': 1082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:35.245134', 'step': 1082, 'epoch': 1} {'type': 'loss', 'content': 0.25628870725631714, 'timestamp': '2025-09-30 22:13:35.249143', 'step': 1083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.279568', 'step': 1083, 'epoch': 1} {'type': 'loss', 'content': 0.09135326743125916, 'timestamp': '2025-09-30 22:13:35.303610', 'step': 1084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:35.337737', 'step': 1084, 'epoch': 1} {'type': 'loss', 'content': 0.22491709887981415, 'timestamp': '2025-09-30 22:13:35.347450', 'step': 1085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:35.379360', 'step': 1085, 'epoch': 1} {'type': 'loss', 'content': 0.1841215342283249, 'timestamp': '2025-09-30 22:13:35.385720', 'step': 1086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.417663', 'step': 1086, 'epoch': 1} {'type': 'loss', 'content': 0.18736563622951508, 'timestamp': '2025-09-30 22:13:35.424575', 'step': 1087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:35.455161', 'step': 1087, 'epoch': 1} {'type': 'loss', 'content': 0.2447938323020935, 'timestamp': '2025-09-30 22:13:35.480521', 'step': 1088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:35.510616', 'step': 1088, 'epoch': 1} {'type': 'loss', 'content': 0.12230326235294342, 'timestamp': '2025-09-30 22:13:35.513131', 'step': 1089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:35.548879', 'step': 1089, 'epoch': 1} {'type': 'loss', 'content': 0.15354503691196442, 'timestamp': '2025-09-30 22:13:35.557536', 'step': 1090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:35.587715', 'step': 1090, 'epoch': 1} {'type': 'loss', 'content': 0.1339503973722458, 'timestamp': '2025-09-30 22:13:35.590465', 'step': 1091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:35.623184', 'step': 1091, 'epoch': 1} {'type': 'loss', 'content': 0.15076634287834167, 'timestamp': '2025-09-30 22:13:35.647434', 'step': 1092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:35.677216', 'step': 1092, 'epoch': 1} {'type': 'loss', 'content': 0.1732509285211563, 'timestamp': '2025-09-30 22:13:35.680650', 'step': 1093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:35.710506', 'step': 1093, 'epoch': 1} {'type': 'loss', 'content': 0.14935888350009918, 'timestamp': '2025-09-30 22:13:35.713631', 'step': 1094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:35.743790', 'step': 1094, 'epoch': 1} {'type': 'loss', 'content': 0.20086027681827545, 'timestamp': '2025-09-30 22:13:35.748735', 'step': 1095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.782032', 'step': 1095, 'epoch': 1} {'type': 'loss', 'content': 0.15869775414466858, 'timestamp': '2025-09-30 22:13:35.805994', 'step': 1096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:35.840110', 'step': 1096, 'epoch': 1} {'type': 'loss', 'content': 0.24835939705371857, 'timestamp': '2025-09-30 22:13:35.844755', 'step': 1097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:35.877983', 'step': 1097, 'epoch': 1} {'type': 'loss', 'content': 0.2195540964603424, 'timestamp': '2025-09-30 22:13:35.882017', 'step': 1098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:35.913761', 'step': 1098, 'epoch': 1} {'type': 'loss', 'content': 0.16732832789421082, 'timestamp': '2025-09-30 22:13:35.918303', 'step': 1099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:35.950785', 'step': 1099, 'epoch': 1} {'type': 'loss', 'content': 0.15951694548130035, 'timestamp': '2025-09-30 22:13:35.978042', 'step': 1100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:36.009004', 'step': 1100, 'epoch': 1} {'type': 'loss', 'content': 0.12001221626996994, 'timestamp': '2025-09-30 22:13:36.014008', 'step': 1101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.050567', 'step': 1101, 'epoch': 1} {'type': 'loss', 'content': 0.13753663003444672, 'timestamp': '2025-09-30 22:13:36.054167', 'step': 1102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:36.084448', 'step': 1102, 'epoch': 1} {'type': 'loss', 'content': 0.2165374904870987, 'timestamp': '2025-09-30 22:13:36.091710', 'step': 1103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:36.122477', 'step': 1103, 'epoch': 1} {'type': 'loss', 'content': 0.13996560871601105, 'timestamp': '2025-09-30 22:13:36.150401', 'step': 1104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.180358', 'step': 1104, 'epoch': 1} {'type': 'loss', 'content': 0.23366497457027435, 'timestamp': '2025-09-30 22:13:36.185001', 'step': 1105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.214863', 'step': 1105, 'epoch': 1} {'type': 'loss', 'content': 0.16150319576263428, 'timestamp': '2025-09-30 22:13:36.222058', 'step': 1106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:36.252637', 'step': 1106, 'epoch': 1} {'type': 'loss', 'content': 0.2225053608417511, 'timestamp': '2025-09-30 22:13:36.262479', 'step': 1107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:36.299360', 'step': 1107, 'epoch': 1} {'type': 'loss', 'content': 0.23443524539470673, 'timestamp': '2025-09-30 22:13:36.328525', 'step': 1108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.367836', 'step': 1108, 'epoch': 1} {'type': 'loss', 'content': 0.16286489367485046, 'timestamp': '2025-09-30 22:13:36.374460', 'step': 1109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:36.409652', 'step': 1109, 'epoch': 1} {'type': 'loss', 'content': 0.15863940119743347, 'timestamp': '2025-09-30 22:13:36.413495', 'step': 1110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:36.444641', 'step': 1110, 'epoch': 1} {'type': 'loss', 'content': 0.1542147696018219, 'timestamp': '2025-09-30 22:13:36.447455', 'step': 1111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:36.478849', 'step': 1111, 'epoch': 1} {'type': 'loss', 'content': 0.21268926560878754, 'timestamp': '2025-09-30 22:13:36.506814', 'step': 1112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:36.536505', 'step': 1112, 'epoch': 1} {'type': 'loss', 'content': 0.19600048661231995, 'timestamp': '2025-09-30 22:13:36.542132', 'step': 1113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:36.571821', 'step': 1113, 'epoch': 1} {'type': 'loss', 'content': 0.16490019857883453, 'timestamp': '2025-09-30 22:13:36.574527', 'step': 1114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:36.607553', 'step': 1114, 'epoch': 1} {'type': 'loss', 'content': 0.12334819883108139, 'timestamp': '2025-09-30 22:13:36.612652', 'step': 1115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.644600', 'step': 1115, 'epoch': 1} {'type': 'loss', 'content': 0.14572837948799133, 'timestamp': '2025-09-30 22:13:36.671287', 'step': 1116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.704428', 'step': 1116, 'epoch': 1} {'type': 'loss', 'content': 0.2016456574201584, 'timestamp': '2025-09-30 22:13:36.706808', 'step': 1117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:36.737233', 'step': 1117, 'epoch': 1} {'type': 'loss', 'content': 0.2114088535308838, 'timestamp': '2025-09-30 22:13:36.746577', 'step': 1118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.784034', 'step': 1118, 'epoch': 1} {'type': 'loss', 'content': 0.20521457493305206, 'timestamp': '2025-09-30 22:13:36.786642', 'step': 1119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:36.821267', 'step': 1119, 'epoch': 1} {'type': 'loss', 'content': 0.14103612303733826, 'timestamp': '2025-09-30 22:13:36.845158', 'step': 1120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:36.878615', 'step': 1120, 'epoch': 1} {'type': 'loss', 'content': 0.23665006458759308, 'timestamp': '2025-09-30 22:13:36.883760', 'step': 1121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.916779', 'step': 1121, 'epoch': 1} {'type': 'loss', 'content': 0.2019800990819931, 'timestamp': '2025-09-30 22:13:36.919496', 'step': 1122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:36.958104', 'step': 1122, 'epoch': 1} {'type': 'loss', 'content': 0.14354044198989868, 'timestamp': '2025-09-30 22:13:36.961504', 'step': 1123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:36.994924', 'step': 1123, 'epoch': 1} {'type': 'loss', 'content': 0.19898127019405365, 'timestamp': '2025-09-30 22:13:37.025948', 'step': 1124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:37.061682', 'step': 1124, 'epoch': 1} {'type': 'loss', 'content': 0.1404489427804947, 'timestamp': '2025-09-30 22:13:37.070713', 'step': 1125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.101164', 'step': 1125, 'epoch': 1} {'type': 'loss', 'content': 0.1499619036912918, 'timestamp': '2025-09-30 22:13:37.111479', 'step': 1126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:37.146503', 'step': 1126, 'epoch': 1} {'type': 'loss', 'content': 0.18285977840423584, 'timestamp': '2025-09-30 22:13:37.149553', 'step': 1127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.183711', 'step': 1127, 'epoch': 1} {'type': 'loss', 'content': 0.22259072959423065, 'timestamp': '2025-09-30 22:13:37.207757', 'step': 1128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.242279', 'step': 1128, 'epoch': 1} {'type': 'loss', 'content': 0.11543814837932587, 'timestamp': '2025-09-30 22:13:37.244753', 'step': 1129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:37.281477', 'step': 1129, 'epoch': 1} {'type': 'loss', 'content': 0.21779361367225647, 'timestamp': '2025-09-30 22:13:37.287994', 'step': 1130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:37.318078', 'step': 1130, 'epoch': 1} {'type': 'loss', 'content': 0.25522181391716003, 'timestamp': '2025-09-30 22:13:37.321126', 'step': 1131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.351164', 'step': 1131, 'epoch': 1} {'type': 'loss', 'content': 0.15116196870803833, 'timestamp': '2025-09-30 22:13:37.375263', 'step': 1132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:37.405050', 'step': 1132, 'epoch': 1} {'type': 'loss', 'content': 0.263317346572876, 'timestamp': '2025-09-30 22:13:37.413187', 'step': 1133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:37.448528', 'step': 1133, 'epoch': 1} {'type': 'loss', 'content': 0.15829916298389435, 'timestamp': '2025-09-30 22:13:37.450861', 'step': 1134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:37.480783', 'step': 1134, 'epoch': 1} {'type': 'loss', 'content': 0.3271110951900482, 'timestamp': '2025-09-30 22:13:37.491388', 'step': 1135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:37.521902', 'step': 1135, 'epoch': 1} {'type': 'loss', 'content': 0.2789473831653595, 'timestamp': '2025-09-30 22:13:37.550149', 'step': 1136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.586838', 'step': 1136, 'epoch': 1} {'type': 'loss', 'content': 0.16617661714553833, 'timestamp': '2025-09-30 22:13:37.593922', 'step': 1137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.630056', 'step': 1137, 'epoch': 1} {'type': 'loss', 'content': 0.18964163959026337, 'timestamp': '2025-09-30 22:13:37.632794', 'step': 1138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:37.667186', 'step': 1138, 'epoch': 1} {'type': 'loss', 'content': 0.18654638528823853, 'timestamp': '2025-09-30 22:13:37.670462', 'step': 1139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:37.701118', 'step': 1139, 'epoch': 1} {'type': 'loss', 'content': 0.09195096045732498, 'timestamp': '2025-09-30 22:13:37.725311', 'step': 1140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:37.755670', 'step': 1140, 'epoch': 1} {'type': 'loss', 'content': 0.18730811774730682, 'timestamp': '2025-09-30 22:13:37.762370', 'step': 1141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:37.796712', 'step': 1141, 'epoch': 1} {'type': 'loss', 'content': 0.234475240111351, 'timestamp': '2025-09-30 22:13:37.799010', 'step': 1142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:37.833045', 'step': 1142, 'epoch': 1} {'type': 'loss', 'content': 0.1254623681306839, 'timestamp': '2025-09-30 22:13:37.835744', 'step': 1143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:37.875134', 'step': 1143, 'epoch': 1} {'type': 'loss', 'content': 0.21480773389339447, 'timestamp': '2025-09-30 22:13:37.899529', 'step': 1144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:37.934169', 'step': 1144, 'epoch': 1} {'type': 'loss', 'content': 0.17897020280361176, 'timestamp': '2025-09-30 22:13:37.936711', 'step': 1145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:37.971914', 'step': 1145, 'epoch': 1} {'type': 'loss', 'content': 0.12704841792583466, 'timestamp': '2025-09-30 22:13:37.976962', 'step': 1146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.007104', 'step': 1146, 'epoch': 1} {'type': 'loss', 'content': 0.1753150224685669, 'timestamp': '2025-09-30 22:13:38.009337', 'step': 1147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.040300', 'step': 1147, 'epoch': 1} {'type': 'loss', 'content': 0.2580123245716095, 'timestamp': '2025-09-30 22:13:38.063802', 'step': 1148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.093909', 'step': 1148, 'epoch': 1} {'type': 'loss', 'content': 0.2517792582511902, 'timestamp': '2025-09-30 22:13:38.096342', 'step': 1149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.128335', 'step': 1149, 'epoch': 1} {'type': 'loss', 'content': 0.20795293152332306, 'timestamp': '2025-09-30 22:13:38.132271', 'step': 1150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.165154', 'step': 1150, 'epoch': 1} {'type': 'loss', 'content': 0.1060749962925911, 'timestamp': '2025-09-30 22:13:38.168750', 'step': 1151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.203037', 'step': 1151, 'epoch': 1} {'type': 'loss', 'content': 0.16592805087566376, 'timestamp': '2025-09-30 22:13:38.228788', 'step': 1152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.259774', 'step': 1152, 'epoch': 1} {'type': 'loss', 'content': 0.1579970419406891, 'timestamp': '2025-09-30 22:13:38.263760', 'step': 1153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.303158', 'step': 1153, 'epoch': 1} {'type': 'loss', 'content': 0.13879640400409698, 'timestamp': '2025-09-30 22:13:38.307547', 'step': 1154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.338678', 'step': 1154, 'epoch': 1} {'type': 'loss', 'content': 0.17016816139221191, 'timestamp': '2025-09-30 22:13:38.341824', 'step': 1155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:13:38.378355', 'step': 1155, 'epoch': 1} {'type': 'loss', 'content': 0.21965011954307556, 'timestamp': '2025-09-30 22:13:38.406738', 'step': 1156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:38.436613', 'step': 1156, 'epoch': 1} {'type': 'loss', 'content': 0.21343611180782318, 'timestamp': '2025-09-30 22:13:38.440773', 'step': 1157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:38.471318', 'step': 1157, 'epoch': 1} {'type': 'loss', 'content': 0.20592033863067627, 'timestamp': '2025-09-30 22:13:38.475119', 'step': 1158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.505039', 'step': 1158, 'epoch': 1} {'type': 'loss', 'content': 0.1840248554944992, 'timestamp': '2025-09-30 22:13:38.508804', 'step': 1159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:38.539694', 'step': 1159, 'epoch': 1} {'type': 'loss', 'content': 0.16993650794029236, 'timestamp': '2025-09-30 22:13:38.567919', 'step': 1160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.598374', 'step': 1160, 'epoch': 1} {'type': 'loss', 'content': 0.1246262937784195, 'timestamp': '2025-09-30 22:13:38.602731', 'step': 1161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:38.634795', 'step': 1161, 'epoch': 1} {'type': 'loss', 'content': 0.25416699051856995, 'timestamp': '2025-09-30 22:13:38.637246', 'step': 1162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:38.671859', 'step': 1162, 'epoch': 1} {'type': 'loss', 'content': 0.16295842826366425, 'timestamp': '2025-09-30 22:13:38.674249', 'step': 1163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:38.704514', 'step': 1163, 'epoch': 1} {'type': 'loss', 'content': 0.1194312572479248, 'timestamp': '2025-09-30 22:13:38.728536', 'step': 1164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.762043', 'step': 1164, 'epoch': 1} {'type': 'loss', 'content': 0.14730197191238403, 'timestamp': '2025-09-30 22:13:38.767067', 'step': 1165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.798088', 'step': 1165, 'epoch': 1} {'type': 'loss', 'content': 0.14088499546051025, 'timestamp': '2025-09-30 22:13:38.801347', 'step': 1166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.831813', 'step': 1166, 'epoch': 1} {'type': 'loss', 'content': 0.17322520911693573, 'timestamp': '2025-09-30 22:13:38.834616', 'step': 1167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:38.864653', 'step': 1167, 'epoch': 1} {'type': 'loss', 'content': 0.21757157146930695, 'timestamp': '2025-09-30 22:13:38.889381', 'step': 1168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.921715', 'step': 1168, 'epoch': 1} {'type': 'loss', 'content': 0.17509539425373077, 'timestamp': '2025-09-30 22:13:38.924337', 'step': 1169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:38.955341', 'step': 1169, 'epoch': 1} {'type': 'loss', 'content': 0.19240471720695496, 'timestamp': '2025-09-30 22:13:38.958498', 'step': 1170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:38.991618', 'step': 1170, 'epoch': 1} {'type': 'loss', 'content': 0.1609620302915573, 'timestamp': '2025-09-30 22:13:39.001366', 'step': 1171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:39.035606', 'step': 1171, 'epoch': 1} {'type': 'loss', 'content': 0.19033437967300415, 'timestamp': '2025-09-30 22:13:39.067958', 'step': 1172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:39.099073', 'step': 1172, 'epoch': 1} {'type': 'loss', 'content': 0.1639460325241089, 'timestamp': '2025-09-30 22:13:39.105335', 'step': 1173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:39.139307', 'step': 1173, 'epoch': 1} {'type': 'loss', 'content': 0.1677083820104599, 'timestamp': '2025-09-30 22:13:39.141713', 'step': 1174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.173106', 'step': 1174, 'epoch': 1} {'type': 'loss', 'content': 0.1478254795074463, 'timestamp': '2025-09-30 22:13:39.180339', 'step': 1175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.215578', 'step': 1175, 'epoch': 1} {'type': 'loss', 'content': 0.20817407965660095, 'timestamp': '2025-09-30 22:13:39.239782', 'step': 1176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.270338', 'step': 1176, 'epoch': 1} {'type': 'loss', 'content': 0.17012520134449005, 'timestamp': '2025-09-30 22:13:39.275377', 'step': 1177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.305898', 'step': 1177, 'epoch': 1} {'type': 'loss', 'content': 0.2587254047393799, 'timestamp': '2025-09-30 22:13:39.308718', 'step': 1178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.350703', 'step': 1178, 'epoch': 1} {'type': 'loss', 'content': 0.13600941002368927, 'timestamp': '2025-09-30 22:13:39.353441', 'step': 1179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.385764', 'step': 1179, 'epoch': 1} {'type': 'loss', 'content': 0.2031494677066803, 'timestamp': '2025-09-30 22:13:39.411253', 'step': 1180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:39.441401', 'step': 1180, 'epoch': 1} {'type': 'loss', 'content': 0.19613203406333923, 'timestamp': '2025-09-30 22:13:39.448748', 'step': 1181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:39.483691', 'step': 1181, 'epoch': 1} {'type': 'loss', 'content': 0.1928146481513977, 'timestamp': '2025-09-30 22:13:39.488814', 'step': 1182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.520314', 'step': 1182, 'epoch': 1} {'type': 'loss', 'content': 0.1773553490638733, 'timestamp': '2025-09-30 22:13:39.524552', 'step': 1183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:39.566578', 'step': 1183, 'epoch': 1} {'type': 'loss', 'content': 0.12322969734668732, 'timestamp': '2025-09-30 22:13:39.593134', 'step': 1184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:39.625892', 'step': 1184, 'epoch': 1} {'type': 'loss', 'content': 0.11237020045518875, 'timestamp': '2025-09-30 22:13:39.629468', 'step': 1185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.661606', 'step': 1185, 'epoch': 1} {'type': 'loss', 'content': 0.159790500998497, 'timestamp': '2025-09-30 22:13:39.664313', 'step': 1186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.695937', 'step': 1186, 'epoch': 1} {'type': 'loss', 'content': 0.2550894021987915, 'timestamp': '2025-09-30 22:13:39.703269', 'step': 1187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.738370', 'step': 1187, 'epoch': 1} {'type': 'loss', 'content': 0.20038515329360962, 'timestamp': '2025-09-30 22:13:39.765825', 'step': 1188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.801049', 'step': 1188, 'epoch': 1} {'type': 'loss', 'content': 0.12249310314655304, 'timestamp': '2025-09-30 22:13:39.803603', 'step': 1189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.834003', 'step': 1189, 'epoch': 1} {'type': 'loss', 'content': 0.15973709523677826, 'timestamp': '2025-09-30 22:13:39.840022', 'step': 1190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:39.874837', 'step': 1190, 'epoch': 1} {'type': 'loss', 'content': 0.193465456366539, 'timestamp': '2025-09-30 22:13:39.885569', 'step': 1191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:39.923069', 'step': 1191, 'epoch': 1} {'type': 'loss', 'content': 0.09709418565034866, 'timestamp': '2025-09-30 22:13:39.953122', 'step': 1192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:39.992146', 'step': 1192, 'epoch': 1} {'type': 'loss', 'content': 0.1572749763727188, 'timestamp': '2025-09-30 22:13:39.994822', 'step': 1193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.026826', 'step': 1193, 'epoch': 1} {'type': 'loss', 'content': 0.24182868003845215, 'timestamp': '2025-09-30 22:13:40.029975', 'step': 1194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.068582', 'step': 1194, 'epoch': 1} {'type': 'loss', 'content': 0.1929669827222824, 'timestamp': '2025-09-30 22:13:40.071676', 'step': 1195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.102185', 'step': 1195, 'epoch': 1} {'type': 'loss', 'content': 0.20435231924057007, 'timestamp': '2025-09-30 22:13:40.126813', 'step': 1196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.158325', 'step': 1196, 'epoch': 1} {'type': 'loss', 'content': 0.24709342420101166, 'timestamp': '2025-09-30 22:13:40.161279', 'step': 1197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.191860', 'step': 1197, 'epoch': 1} {'type': 'loss', 'content': 0.1474473625421524, 'timestamp': '2025-09-30 22:13:40.194894', 'step': 1198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.232654', 'step': 1198, 'epoch': 1} {'type': 'loss', 'content': 0.14378570020198822, 'timestamp': '2025-09-30 22:13:40.236009', 'step': 1199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:40.267073', 'step': 1199, 'epoch': 1} {'type': 'loss', 'content': 0.1324489563703537, 'timestamp': '2025-09-30 22:13:40.295842', 'step': 1200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.330867', 'step': 1200, 'epoch': 1} {'type': 'loss', 'content': 0.1502130627632141, 'timestamp': '2025-09-30 22:13:40.337400', 'step': 1201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:40.370424', 'step': 1201, 'epoch': 1} {'type': 'loss', 'content': 0.17968137562274933, 'timestamp': '2025-09-30 22:13:40.377106', 'step': 1202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:40.415189', 'step': 1202, 'epoch': 1} {'type': 'loss', 'content': 0.19790375232696533, 'timestamp': '2025-09-30 22:13:40.419377', 'step': 1203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:40.451414', 'step': 1203, 'epoch': 1} {'type': 'loss', 'content': 0.18997623026371002, 'timestamp': '2025-09-30 22:13:40.480253', 'step': 1204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.515468', 'step': 1204, 'epoch': 1} {'type': 'loss', 'content': 0.15336033701896667, 'timestamp': '2025-09-30 22:13:40.522569', 'step': 1205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:40.552709', 'step': 1205, 'epoch': 1} {'type': 'loss', 'content': 0.23977093398571014, 'timestamp': '2025-09-30 22:13:40.555394', 'step': 1206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.585659', 'step': 1206, 'epoch': 1} {'type': 'loss', 'content': 0.17550896108150482, 'timestamp': '2025-09-30 22:13:40.589311', 'step': 1207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.619571', 'step': 1207, 'epoch': 1} {'type': 'loss', 'content': 0.14231431484222412, 'timestamp': '2025-09-30 22:13:40.644392', 'step': 1208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:40.674186', 'step': 1208, 'epoch': 1} {'type': 'loss', 'content': 0.10784076154232025, 'timestamp': '2025-09-30 22:13:40.678234', 'step': 1209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:40.709932', 'step': 1209, 'epoch': 1} {'type': 'loss', 'content': 0.22867295145988464, 'timestamp': '2025-09-30 22:13:40.714604', 'step': 1210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.745870', 'step': 1210, 'epoch': 1} {'type': 'loss', 'content': 0.1593831330537796, 'timestamp': '2025-09-30 22:13:40.748332', 'step': 1211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:13:40.782261', 'step': 1211, 'epoch': 1} {'type': 'loss', 'content': 0.19309990108013153, 'timestamp': '2025-09-30 22:13:40.810294', 'step': 1212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:40.843172', 'step': 1212, 'epoch': 1} {'type': 'loss', 'content': 0.14232778549194336, 'timestamp': '2025-09-30 22:13:40.848094', 'step': 1213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:40.886053', 'step': 1213, 'epoch': 1} {'type': 'loss', 'content': 0.14673657715320587, 'timestamp': '2025-09-30 22:13:40.893563', 'step': 1214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.928699', 'step': 1214, 'epoch': 1} {'type': 'loss', 'content': 0.14522995054721832, 'timestamp': '2025-09-30 22:13:40.938760', 'step': 1215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:40.974685', 'step': 1215, 'epoch': 1} {'type': 'loss', 'content': 0.14394035935401917, 'timestamp': '2025-09-30 22:13:40.998649', 'step': 1216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.028510', 'step': 1216, 'epoch': 1} {'type': 'loss', 'content': 0.17561060190200806, 'timestamp': '2025-09-30 22:13:41.031064', 'step': 1217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.067923', 'step': 1217, 'epoch': 1} {'type': 'loss', 'content': 0.15931373834609985, 'timestamp': '2025-09-30 22:13:41.071600', 'step': 1218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.102344', 'step': 1218, 'epoch': 1} {'type': 'loss', 'content': 0.19655300676822662, 'timestamp': '2025-09-30 22:13:41.105133', 'step': 1219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:41.138174', 'step': 1219, 'epoch': 1} {'type': 'loss', 'content': 0.12700961530208588, 'timestamp': '2025-09-30 22:13:41.162982', 'step': 1220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:41.192605', 'step': 1220, 'epoch': 1} {'type': 'loss', 'content': 0.1774924248456955, 'timestamp': '2025-09-30 22:13:41.195807', 'step': 1221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:41.229570', 'step': 1221, 'epoch': 1} {'type': 'loss', 'content': 0.12119899690151215, 'timestamp': '2025-09-30 22:13:41.232611', 'step': 1222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.273678', 'step': 1222, 'epoch': 1} {'type': 'loss', 'content': 0.19798864424228668, 'timestamp': '2025-09-30 22:13:41.276619', 'step': 1223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.309936', 'step': 1223, 'epoch': 1} {'type': 'loss', 'content': 0.12788750231266022, 'timestamp': '2025-09-30 22:13:41.334310', 'step': 1224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.364919', 'step': 1224, 'epoch': 1} {'type': 'loss', 'content': 0.2545218765735626, 'timestamp': '2025-09-30 22:13:41.371330', 'step': 1225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.405517', 'step': 1225, 'epoch': 1} {'type': 'loss', 'content': 0.27373868227005005, 'timestamp': '2025-09-30 22:13:41.408116', 'step': 1226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:41.437740', 'step': 1226, 'epoch': 1} {'type': 'loss', 'content': 0.2400110810995102, 'timestamp': '2025-09-30 22:13:41.441012', 'step': 1227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.471518', 'step': 1227, 'epoch': 1} {'type': 'loss', 'content': 0.13291114568710327, 'timestamp': '2025-09-30 22:13:41.495731', 'step': 1228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.525532', 'step': 1228, 'epoch': 1} {'type': 'loss', 'content': 0.21680592000484467, 'timestamp': '2025-09-30 22:13:41.527976', 'step': 1229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:41.557763', 'step': 1229, 'epoch': 1} {'type': 'loss', 'content': 0.28993624448776245, 'timestamp': '2025-09-30 22:13:41.565028', 'step': 1230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:41.595290', 'step': 1230, 'epoch': 1} {'type': 'loss', 'content': 0.21565328538417816, 'timestamp': '2025-09-30 22:13:41.598456', 'step': 1231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.629214', 'step': 1231, 'epoch': 1} {'type': 'loss', 'content': 0.19558440148830414, 'timestamp': '2025-09-30 22:13:41.652873', 'step': 1232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.683468', 'step': 1232, 'epoch': 1} {'type': 'loss', 'content': 0.18775826692581177, 'timestamp': '2025-09-30 22:13:41.685898', 'step': 1233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.717816', 'step': 1233, 'epoch': 1} {'type': 'loss', 'content': 0.15276043117046356, 'timestamp': '2025-09-30 22:13:41.721808', 'step': 1234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.752481', 'step': 1234, 'epoch': 1} {'type': 'loss', 'content': 0.13310888409614563, 'timestamp': '2025-09-30 22:13:41.754988', 'step': 1235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.785327', 'step': 1235, 'epoch': 1} {'type': 'loss', 'content': 0.1323978751897812, 'timestamp': '2025-09-30 22:13:41.810796', 'step': 1236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:41.840763', 'step': 1236, 'epoch': 1} {'type': 'loss', 'content': 0.21151027083396912, 'timestamp': '2025-09-30 22:13:41.849570', 'step': 1237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-30 22:13:41.901224', 'step': 1237, 'epoch': 1} {'type': 'loss', 'content': 0.43498653173446655, 'timestamp': '2025-09-30 22:13:41.907391', 'step': 1238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:41.940017', 'step': 1238, 'epoch': 1} {'type': 'loss', 'content': 0.1481143981218338, 'timestamp': '2025-09-30 22:13:41.946443', 'step': 1239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:41.976513', 'step': 1239, 'epoch': 1} {'type': 'loss', 'content': 0.2441544383764267, 'timestamp': '2025-09-30 22:13:42.000343', 'step': 1240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:42.032787', 'step': 1240, 'epoch': 1} {'type': 'loss', 'content': 0.3002569377422333, 'timestamp': '2025-09-30 22:13:42.035243', 'step': 1241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:42.065952', 'step': 1241, 'epoch': 1} {'type': 'loss', 'content': 0.16613680124282837, 'timestamp': '2025-09-30 22:13:42.069028', 'step': 1242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:42.099843', 'step': 1242, 'epoch': 1} {'type': 'loss', 'content': 0.1677839159965515, 'timestamp': '2025-09-30 22:13:42.103223', 'step': 1243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.140447', 'step': 1243, 'epoch': 1} {'type': 'loss', 'content': 0.11132003366947174, 'timestamp': '2025-09-30 22:13:42.164503', 'step': 1244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.194321', 'step': 1244, 'epoch': 1} {'type': 'loss', 'content': 0.22670656442642212, 'timestamp': '2025-09-30 22:13:42.204531', 'step': 1245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:42.241430', 'step': 1245, 'epoch': 1} {'type': 'loss', 'content': 0.15783725678920746, 'timestamp': '2025-09-30 22:13:42.244425', 'step': 1246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:42.275934', 'step': 1246, 'epoch': 1} {'type': 'loss', 'content': 0.25072625279426575, 'timestamp': '2025-09-30 22:13:42.279069', 'step': 1247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.315275', 'step': 1247, 'epoch': 1} {'type': 'loss', 'content': 0.09868284314870834, 'timestamp': '2025-09-30 22:13:42.345894', 'step': 1248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.381068', 'step': 1248, 'epoch': 1} {'type': 'loss', 'content': 0.17731255292892456, 'timestamp': '2025-09-30 22:13:42.384710', 'step': 1249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.420841', 'step': 1249, 'epoch': 1} {'type': 'loss', 'content': 0.14086131751537323, 'timestamp': '2025-09-30 22:13:42.423753', 'step': 1250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:42.454097', 'step': 1250, 'epoch': 1} {'type': 'loss', 'content': 0.17437703907489777, 'timestamp': '2025-09-30 22:13:42.459128', 'step': 1251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:42.489688', 'step': 1251, 'epoch': 1} {'type': 'loss', 'content': 0.15029506385326385, 'timestamp': '2025-09-30 22:13:42.516777', 'step': 1252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.551297', 'step': 1252, 'epoch': 1} {'type': 'loss', 'content': 0.14391742646694183, 'timestamp': '2025-09-30 22:13:42.554339', 'step': 1253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.585568', 'step': 1253, 'epoch': 1} {'type': 'loss', 'content': 0.14118072390556335, 'timestamp': '2025-09-30 22:13:42.589453', 'step': 1254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.620513', 'step': 1254, 'epoch': 1} {'type': 'loss', 'content': 0.15960662066936493, 'timestamp': '2025-09-30 22:13:42.623732', 'step': 1255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:42.663652', 'step': 1255, 'epoch': 1} {'type': 'loss', 'content': 0.18916498124599457, 'timestamp': '2025-09-30 22:13:42.688489', 'step': 1256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.719773', 'step': 1256, 'epoch': 1} {'type': 'loss', 'content': 0.17227348685264587, 'timestamp': '2025-09-30 22:13:42.723370', 'step': 1257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.756634', 'step': 1257, 'epoch': 1} {'type': 'loss', 'content': 0.22066280245780945, 'timestamp': '2025-09-30 22:13:42.759830', 'step': 1258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.792056', 'step': 1258, 'epoch': 1} {'type': 'loss', 'content': 0.18918181955814362, 'timestamp': '2025-09-30 22:13:42.796742', 'step': 1259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.827571', 'step': 1259, 'epoch': 1} {'type': 'loss', 'content': 0.2583785653114319, 'timestamp': '2025-09-30 22:13:42.854595', 'step': 1260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:42.889688', 'step': 1260, 'epoch': 1} {'type': 'loss', 'content': 0.21281011402606964, 'timestamp': '2025-09-30 22:13:42.893007', 'step': 1261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:42.923562', 'step': 1261, 'epoch': 1} {'type': 'loss', 'content': 0.2494121938943863, 'timestamp': '2025-09-30 22:13:42.930052', 'step': 1262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.966781', 'step': 1262, 'epoch': 1} {'type': 'loss', 'content': 0.2003623992204666, 'timestamp': '2025-09-30 22:13:42.969477', 'step': 1263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:42.999639', 'step': 1263, 'epoch': 1} {'type': 'loss', 'content': 0.14455623924732208, 'timestamp': '2025-09-30 22:13:43.023763', 'step': 1264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.055017', 'step': 1264, 'epoch': 1} {'type': 'loss', 'content': 0.27800944447517395, 'timestamp': '2025-09-30 22:13:43.057997', 'step': 1265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.090728', 'step': 1265, 'epoch': 1} {'type': 'loss', 'content': 0.1773063987493515, 'timestamp': '2025-09-30 22:13:43.096106', 'step': 1266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.130089', 'step': 1266, 'epoch': 1} {'type': 'loss', 'content': 0.26120665669441223, 'timestamp': '2025-09-30 22:13:43.134491', 'step': 1267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.166670', 'step': 1267, 'epoch': 1} {'type': 'loss', 'content': 0.058677252382040024, 'timestamp': '2025-09-30 22:13:43.193033', 'step': 1268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.225019', 'step': 1268, 'epoch': 1} {'type': 'loss', 'content': 0.14811089634895325, 'timestamp': '2025-09-30 22:13:43.228291', 'step': 1269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:43.262366', 'step': 1269, 'epoch': 1} {'type': 'loss', 'content': 0.34474313259124756, 'timestamp': '2025-09-30 22:13:43.268529', 'step': 1270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.300914', 'step': 1270, 'epoch': 1} {'type': 'loss', 'content': 0.25195276737213135, 'timestamp': '2025-09-30 22:13:43.305562', 'step': 1271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:43.350050', 'step': 1271, 'epoch': 1} {'type': 'loss', 'content': 0.17865093052387238, 'timestamp': '2025-09-30 22:13:43.376654', 'step': 1272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.410944', 'step': 1272, 'epoch': 1} {'type': 'loss', 'content': 0.19221217930316925, 'timestamp': '2025-09-30 22:13:43.413642', 'step': 1273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.445206', 'step': 1273, 'epoch': 1} {'type': 'loss', 'content': 0.12593704462051392, 'timestamp': '2025-09-30 22:13:43.455150', 'step': 1274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.494077', 'step': 1274, 'epoch': 1} {'type': 'loss', 'content': 0.10183713585138321, 'timestamp': '2025-09-30 22:13:43.499734', 'step': 1275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.533387', 'step': 1275, 'epoch': 1} {'type': 'loss', 'content': 0.21217554807662964, 'timestamp': '2025-09-30 22:13:43.557548', 'step': 1276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.587795', 'step': 1276, 'epoch': 1} {'type': 'loss', 'content': 0.27198106050491333, 'timestamp': '2025-09-30 22:13:43.593945', 'step': 1277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.631804', 'step': 1277, 'epoch': 1} {'type': 'loss', 'content': 0.1714089810848236, 'timestamp': '2025-09-30 22:13:43.638104', 'step': 1278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:43.672493', 'step': 1278, 'epoch': 1} {'type': 'loss', 'content': 0.15697301924228668, 'timestamp': '2025-09-30 22:13:43.678436', 'step': 1279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:43.710656', 'step': 1279, 'epoch': 1} {'type': 'loss', 'content': 0.197781041264534, 'timestamp': '2025-09-30 22:13:43.734499', 'step': 1280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:43.770841', 'step': 1280, 'epoch': 1} {'type': 'loss', 'content': 0.16085168719291687, 'timestamp': '2025-09-30 22:13:43.778892', 'step': 1281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.818011', 'step': 1281, 'epoch': 1} {'type': 'loss', 'content': 0.13589800894260406, 'timestamp': '2025-09-30 22:13:43.822042', 'step': 1282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:43.852283', 'step': 1282, 'epoch': 1} {'type': 'loss', 'content': 0.18836073577404022, 'timestamp': '2025-09-30 22:13:43.860130', 'step': 1283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.895953', 'step': 1283, 'epoch': 1} {'type': 'loss', 'content': 0.15766626596450806, 'timestamp': '2025-09-30 22:13:43.919918', 'step': 1284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:43.953272', 'step': 1284, 'epoch': 1} {'type': 'loss', 'content': 0.18254365026950836, 'timestamp': '2025-09-30 22:13:43.956368', 'step': 1285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:43.989385', 'step': 1285, 'epoch': 1} {'type': 'loss', 'content': 0.22995850443840027, 'timestamp': '2025-09-30 22:13:43.994572', 'step': 1286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.026911', 'step': 1286, 'epoch': 1} {'type': 'loss', 'content': 0.26424139738082886, 'timestamp': '2025-09-30 22:13:44.031562', 'step': 1287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.063640', 'step': 1287, 'epoch': 1} {'type': 'loss', 'content': 0.13404683768749237, 'timestamp': '2025-09-30 22:13:44.087915', 'step': 1288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.120314', 'step': 1288, 'epoch': 1} {'type': 'loss', 'content': 0.14724205434322357, 'timestamp': '2025-09-30 22:13:44.125029', 'step': 1289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:44.154801', 'step': 1289, 'epoch': 1} {'type': 'loss', 'content': 0.1836402714252472, 'timestamp': '2025-09-30 22:13:44.159606', 'step': 1290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.193614', 'step': 1290, 'epoch': 1} {'type': 'loss', 'content': 0.21043998003005981, 'timestamp': '2025-09-30 22:13:44.199172', 'step': 1291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.232268', 'step': 1291, 'epoch': 1} {'type': 'loss', 'content': 0.1723576784133911, 'timestamp': '2025-09-30 22:13:44.256583', 'step': 1292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.287375', 'step': 1292, 'epoch': 1} {'type': 'loss', 'content': 0.14934109151363373, 'timestamp': '2025-09-30 22:13:44.291076', 'step': 1293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.322292', 'step': 1293, 'epoch': 1} {'type': 'loss', 'content': 0.1617506891489029, 'timestamp': '2025-09-30 22:13:44.328774', 'step': 1294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:44.363545', 'step': 1294, 'epoch': 1} {'type': 'loss', 'content': 0.21528920531272888, 'timestamp': '2025-09-30 22:13:44.366446', 'step': 1295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.397272', 'step': 1295, 'epoch': 1} {'type': 'loss', 'content': 0.23211485147476196, 'timestamp': '2025-09-30 22:13:44.422208', 'step': 1296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.452665', 'step': 1296, 'epoch': 1} {'type': 'loss', 'content': 0.12821407616138458, 'timestamp': '2025-09-30 22:13:44.461390', 'step': 1297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.492199', 'step': 1297, 'epoch': 1} {'type': 'loss', 'content': 0.1846700757741928, 'timestamp': '2025-09-30 22:13:44.496887', 'step': 1298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.530330', 'step': 1298, 'epoch': 1} {'type': 'loss', 'content': 0.17717312276363373, 'timestamp': '2025-09-30 22:13:44.534306', 'step': 1299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:44.573011', 'step': 1299, 'epoch': 1} {'type': 'loss', 'content': 0.16293635964393616, 'timestamp': '2025-09-30 22:13:44.598904', 'step': 1300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.630987', 'step': 1300, 'epoch': 1} {'type': 'loss', 'content': 0.2422308772802353, 'timestamp': '2025-09-30 22:13:44.640355', 'step': 1301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.672287', 'step': 1301, 'epoch': 1} {'type': 'loss', 'content': 0.19843678176403046, 'timestamp': '2025-09-30 22:13:44.682908', 'step': 1302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.720077', 'step': 1302, 'epoch': 1} {'type': 'loss', 'content': 0.2658361792564392, 'timestamp': '2025-09-30 22:13:44.722851', 'step': 1303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:44.757281', 'step': 1303, 'epoch': 1} {'type': 'loss', 'content': 0.18723970651626587, 'timestamp': '2025-09-30 22:13:44.781872', 'step': 1304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:44.819359', 'step': 1304, 'epoch': 1} {'type': 'loss', 'content': 0.16742612421512604, 'timestamp': '2025-09-30 22:13:44.821489', 'step': 1305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:44.858207', 'step': 1305, 'epoch': 1} {'type': 'loss', 'content': 0.3289197087287903, 'timestamp': '2025-09-30 22:13:44.862432', 'step': 1306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.894681', 'step': 1306, 'epoch': 1} {'type': 'loss', 'content': 0.18634235858917236, 'timestamp': '2025-09-30 22:13:44.900467', 'step': 1307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:44.933803', 'step': 1307, 'epoch': 1} {'type': 'loss', 'content': 0.23759399354457855, 'timestamp': '2025-09-30 22:13:44.962132', 'step': 1308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:44.992350', 'step': 1308, 'epoch': 1} {'type': 'loss', 'content': 0.1446249932050705, 'timestamp': '2025-09-30 22:13:44.997516', 'step': 1309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:45.030533', 'step': 1309, 'epoch': 1} {'type': 'loss', 'content': 0.10058934986591339, 'timestamp': '2025-09-30 22:13:45.032879', 'step': 1310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:45.065337', 'step': 1310, 'epoch': 1} {'type': 'loss', 'content': 0.14695221185684204, 'timestamp': '2025-09-30 22:13:45.070677', 'step': 1311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:45.105115', 'step': 1311, 'epoch': 1} {'type': 'loss', 'content': 0.14964573085308075, 'timestamp': '2025-09-30 22:13:45.129231', 'step': 1312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.159956', 'step': 1312, 'epoch': 1} {'type': 'loss', 'content': 0.19436956942081451, 'timestamp': '2025-09-30 22:13:45.162521', 'step': 1313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:45.193977', 'step': 1313, 'epoch': 1} {'type': 'loss', 'content': 0.16495925188064575, 'timestamp': '2025-09-30 22:13:45.197141', 'step': 1314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:45.229909', 'step': 1314, 'epoch': 1} {'type': 'loss', 'content': 0.19586580991744995, 'timestamp': '2025-09-30 22:13:45.232480', 'step': 1315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.262420', 'step': 1315, 'epoch': 1} {'type': 'loss', 'content': 0.22994253039360046, 'timestamp': '2025-09-30 22:13:45.292050', 'step': 1316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.326730', 'step': 1316, 'epoch': 1} {'type': 'loss', 'content': 0.1582787036895752, 'timestamp': '2025-09-30 22:13:45.330461', 'step': 1317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.362494', 'step': 1317, 'epoch': 1} {'type': 'loss', 'content': 0.13490812480449677, 'timestamp': '2025-09-30 22:13:45.365989', 'step': 1318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:45.398100', 'step': 1318, 'epoch': 1} {'type': 'loss', 'content': 0.16624963283538818, 'timestamp': '2025-09-30 22:13:45.400808', 'step': 1319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.431303', 'step': 1319, 'epoch': 1} {'type': 'loss', 'content': 0.07824689149856567, 'timestamp': '2025-09-30 22:13:45.455111', 'step': 1320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:45.485427', 'step': 1320, 'epoch': 1} {'type': 'loss', 'content': 0.19138260185718536, 'timestamp': '2025-09-30 22:13:45.488749', 'step': 1321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:13:45.521286', 'step': 1321, 'epoch': 1} {'type': 'loss', 'content': 0.11768519133329391, 'timestamp': '2025-09-30 22:13:45.526682', 'step': 1322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.557183', 'step': 1322, 'epoch': 1} {'type': 'loss', 'content': 0.1241602748632431, 'timestamp': '2025-09-30 22:13:45.559381', 'step': 1323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:45.589837', 'step': 1323, 'epoch': 1} {'type': 'loss', 'content': 0.13489998877048492, 'timestamp': '2025-09-30 22:13:45.615142', 'step': 1324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:45.650770', 'step': 1324, 'epoch': 1} {'type': 'loss', 'content': 0.2807198762893677, 'timestamp': '2025-09-30 22:13:45.654511', 'step': 1325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:45.684784', 'step': 1325, 'epoch': 1} {'type': 'loss', 'content': 0.10467332601547241, 'timestamp': '2025-09-30 22:13:45.689744', 'step': 1326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.727860', 'step': 1326, 'epoch': 1} {'type': 'loss', 'content': 0.1686045080423355, 'timestamp': '2025-09-30 22:13:45.730782', 'step': 1327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:45.762103', 'step': 1327, 'epoch': 1} {'type': 'loss', 'content': 0.1398056149482727, 'timestamp': '2025-09-30 22:13:45.787550', 'step': 1328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:45.818895', 'step': 1328, 'epoch': 1} {'type': 'loss', 'content': 0.13472643494606018, 'timestamp': '2025-09-30 22:13:45.822864', 'step': 1329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:45.855049', 'step': 1329, 'epoch': 1} {'type': 'loss', 'content': 0.11444007605314255, 'timestamp': '2025-09-30 22:13:45.858760', 'step': 1330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:45.889126', 'step': 1330, 'epoch': 1} {'type': 'loss', 'content': 0.14247946441173553, 'timestamp': '2025-09-30 22:13:45.892393', 'step': 1331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:45.921922', 'step': 1331, 'epoch': 1} {'type': 'loss', 'content': 0.2580181956291199, 'timestamp': '2025-09-30 22:13:45.946485', 'step': 1332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:45.976486', 'step': 1332, 'epoch': 1} {'type': 'loss', 'content': 0.19450318813323975, 'timestamp': '2025-09-30 22:13:45.979561', 'step': 1333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:46.017270', 'step': 1333, 'epoch': 1} {'type': 'loss', 'content': 0.26408323645591736, 'timestamp': '2025-09-30 22:13:46.023370', 'step': 1334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.053699', 'step': 1334, 'epoch': 1} {'type': 'loss', 'content': 0.19019345939159393, 'timestamp': '2025-09-30 22:13:46.059242', 'step': 1335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.093151', 'step': 1335, 'epoch': 1} {'type': 'loss', 'content': 0.16896486282348633, 'timestamp': '2025-09-30 22:13:46.119135', 'step': 1336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:46.152755', 'step': 1336, 'epoch': 1} {'type': 'loss', 'content': 0.10465061664581299, 'timestamp': '2025-09-30 22:13:46.156771', 'step': 1337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.186819', 'step': 1337, 'epoch': 1} {'type': 'loss', 'content': 0.10708863288164139, 'timestamp': '2025-09-30 22:13:46.194687', 'step': 1338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:46.252123', 'step': 1338, 'epoch': 1} {'type': 'loss', 'content': 0.1522536277770996, 'timestamp': '2025-09-30 22:13:46.254457', 'step': 1339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.286032', 'step': 1339, 'epoch': 1} {'type': 'loss', 'content': 0.25045451521873474, 'timestamp': '2025-09-30 22:13:46.310717', 'step': 1340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.346744', 'step': 1340, 'epoch': 1} {'type': 'loss', 'content': 0.19809621572494507, 'timestamp': '2025-09-30 22:13:46.349512', 'step': 1341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.383000', 'step': 1341, 'epoch': 1} {'type': 'loss', 'content': 0.21329814195632935, 'timestamp': '2025-09-30 22:13:46.385174', 'step': 1342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:46.417465', 'step': 1342, 'epoch': 1} {'type': 'loss', 'content': 0.13290777802467346, 'timestamp': '2025-09-30 22:13:46.419720', 'step': 1343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.452855', 'step': 1343, 'epoch': 1} {'type': 'loss', 'content': 0.1217518299818039, 'timestamp': '2025-09-30 22:13:46.481297', 'step': 1344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.516055', 'step': 1344, 'epoch': 1} {'type': 'loss', 'content': 0.3138441741466522, 'timestamp': '2025-09-30 22:13:46.519954', 'step': 1345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.551108', 'step': 1345, 'epoch': 1} {'type': 'loss', 'content': 0.1928676962852478, 'timestamp': '2025-09-30 22:13:46.558036', 'step': 1346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:46.588582', 'step': 1346, 'epoch': 1} {'type': 'loss', 'content': 0.11553669720888138, 'timestamp': '2025-09-30 22:13:46.590579', 'step': 1347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.622803', 'step': 1347, 'epoch': 1} {'type': 'loss', 'content': 0.23235172033309937, 'timestamp': '2025-09-30 22:13:46.646703', 'step': 1348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.679300', 'step': 1348, 'epoch': 1} {'type': 'loss', 'content': 0.21208646893501282, 'timestamp': '2025-09-30 22:13:46.683801', 'step': 1349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:46.716602', 'step': 1349, 'epoch': 1} {'type': 'loss', 'content': 0.1887199729681015, 'timestamp': '2025-09-30 22:13:46.718996', 'step': 1350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.751023', 'step': 1350, 'epoch': 1} {'type': 'loss', 'content': 0.1415257453918457, 'timestamp': '2025-09-30 22:13:46.754430', 'step': 1351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:46.786986', 'step': 1351, 'epoch': 1} {'type': 'loss', 'content': 0.153824120759964, 'timestamp': '2025-09-30 22:13:46.811163', 'step': 1352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.841623', 'step': 1352, 'epoch': 1} {'type': 'loss', 'content': 0.2195548266172409, 'timestamp': '2025-09-30 22:13:46.846696', 'step': 1353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.882333', 'step': 1353, 'epoch': 1} {'type': 'loss', 'content': 0.1474395990371704, 'timestamp': '2025-09-30 22:13:46.889120', 'step': 1354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:46.926699', 'step': 1354, 'epoch': 1} {'type': 'loss', 'content': 0.33409321308135986, 'timestamp': '2025-09-30 22:13:46.929130', 'step': 1355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:46.959374', 'step': 1355, 'epoch': 1} {'type': 'loss', 'content': 0.13828375935554504, 'timestamp': '2025-09-30 22:13:46.983348', 'step': 1356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:47.014191', 'step': 1356, 'epoch': 1} {'type': 'loss', 'content': 0.1986316740512848, 'timestamp': '2025-09-30 22:13:47.017694', 'step': 1357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.053594', 'step': 1357, 'epoch': 1} {'type': 'loss', 'content': 0.28767603635787964, 'timestamp': '2025-09-30 22:13:47.056454', 'step': 1358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:47.088632', 'step': 1358, 'epoch': 1} {'type': 'loss', 'content': 0.24464285373687744, 'timestamp': '2025-09-30 22:13:47.092681', 'step': 1359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.123915', 'step': 1359, 'epoch': 1} {'type': 'loss', 'content': 0.19419480860233307, 'timestamp': '2025-09-30 22:13:47.148566', 'step': 1360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.181602', 'step': 1360, 'epoch': 1} {'type': 'loss', 'content': 0.20523080229759216, 'timestamp': '2025-09-30 22:13:47.183817', 'step': 1361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.216059', 'step': 1361, 'epoch': 1} {'type': 'loss', 'content': 0.09270875155925751, 'timestamp': '2025-09-30 22:13:47.221549', 'step': 1362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:47.252486', 'step': 1362, 'epoch': 1} {'type': 'loss', 'content': 0.11821252852678299, 'timestamp': '2025-09-30 22:13:47.256249', 'step': 1363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.288276', 'step': 1363, 'epoch': 1} {'type': 'loss', 'content': 0.22952447831630707, 'timestamp': '2025-09-30 22:13:47.313969', 'step': 1364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.345886', 'step': 1364, 'epoch': 1} {'type': 'loss', 'content': 0.09787154197692871, 'timestamp': '2025-09-30 22:13:47.351567', 'step': 1365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:47.384746', 'step': 1365, 'epoch': 1} {'type': 'loss', 'content': 0.1309771090745926, 'timestamp': '2025-09-30 22:13:47.387510', 'step': 1366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.418772', 'step': 1366, 'epoch': 1} {'type': 'loss', 'content': 0.18716448545455933, 'timestamp': '2025-09-30 22:13:47.422868', 'step': 1367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.454181', 'step': 1367, 'epoch': 1} {'type': 'loss', 'content': 0.08896107226610184, 'timestamp': '2025-09-30 22:13:47.481184', 'step': 1368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.512491', 'step': 1368, 'epoch': 1} {'type': 'loss', 'content': 0.18996861577033997, 'timestamp': '2025-09-30 22:13:47.514847', 'step': 1369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:47.545606', 'step': 1369, 'epoch': 1} {'type': 'loss', 'content': 0.170039102435112, 'timestamp': '2025-09-30 22:13:47.548285', 'step': 1370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.579620', 'step': 1370, 'epoch': 1} {'type': 'loss', 'content': 0.20061293244361877, 'timestamp': '2025-09-30 22:13:47.582847', 'step': 1371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:47.613756', 'step': 1371, 'epoch': 1} {'type': 'loss', 'content': 0.13242749869823456, 'timestamp': '2025-09-30 22:13:47.639974', 'step': 1372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.672692', 'step': 1372, 'epoch': 1} {'type': 'loss', 'content': 0.08180296421051025, 'timestamp': '2025-09-30 22:13:47.675472', 'step': 1373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:47.708530', 'step': 1373, 'epoch': 1} {'type': 'loss', 'content': 0.21371741592884064, 'timestamp': '2025-09-30 22:13:47.713377', 'step': 1374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:47.745155', 'step': 1374, 'epoch': 1} {'type': 'loss', 'content': 0.15324003994464874, 'timestamp': '2025-09-30 22:13:47.748938', 'step': 1375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.781698', 'step': 1375, 'epoch': 1} {'type': 'loss', 'content': 0.23577739298343658, 'timestamp': '2025-09-30 22:13:47.808035', 'step': 1376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:47.838854', 'step': 1376, 'epoch': 1} {'type': 'loss', 'content': 0.1662328541278839, 'timestamp': '2025-09-30 22:13:47.840740', 'step': 1377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.874188', 'step': 1377, 'epoch': 1} {'type': 'loss', 'content': 0.21029634773731232, 'timestamp': '2025-09-30 22:13:47.876503', 'step': 1378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:47.910117', 'step': 1378, 'epoch': 1} {'type': 'loss', 'content': 0.14170819520950317, 'timestamp': '2025-09-30 22:13:47.912874', 'step': 1379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:47.943085', 'step': 1379, 'epoch': 1} {'type': 'loss', 'content': 0.1606454998254776, 'timestamp': '2025-09-30 22:13:47.966958', 'step': 1380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:47.998112', 'step': 1380, 'epoch': 1} {'type': 'loss', 'content': 0.16500429809093475, 'timestamp': '2025-09-30 22:13:48.000493', 'step': 1381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:48.032518', 'step': 1381, 'epoch': 1} {'type': 'loss', 'content': 0.13271339237689972, 'timestamp': '2025-09-30 22:13:48.035369', 'step': 1382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.065322', 'step': 1382, 'epoch': 1} {'type': 'loss', 'content': 0.2363748848438263, 'timestamp': '2025-09-30 22:13:48.070740', 'step': 1383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.100976', 'step': 1383, 'epoch': 1} {'type': 'loss', 'content': 0.15010544657707214, 'timestamp': '2025-09-30 22:13:48.125828', 'step': 1384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:48.160985', 'step': 1384, 'epoch': 1} {'type': 'loss', 'content': 0.21919719874858856, 'timestamp': '2025-09-30 22:13:48.164582', 'step': 1385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:48.195934', 'step': 1385, 'epoch': 1} {'type': 'loss', 'content': 0.18868428468704224, 'timestamp': '2025-09-30 22:13:48.199247', 'step': 1386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:48.229098', 'step': 1386, 'epoch': 1} {'type': 'loss', 'content': 0.16120631992816925, 'timestamp': '2025-09-30 22:13:48.231538', 'step': 1387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:48.260913', 'step': 1387, 'epoch': 1} {'type': 'loss', 'content': 0.18819983303546906, 'timestamp': '2025-09-30 22:13:48.285141', 'step': 1388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.315599', 'step': 1388, 'epoch': 1} {'type': 'loss', 'content': 0.19977949559688568, 'timestamp': '2025-09-30 22:13:48.318329', 'step': 1389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.348261', 'step': 1389, 'epoch': 1} {'type': 'loss', 'content': 0.161846324801445, 'timestamp': '2025-09-30 22:13:48.353016', 'step': 1390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:48.382878', 'step': 1390, 'epoch': 1} {'type': 'loss', 'content': 0.20111417770385742, 'timestamp': '2025-09-30 22:13:48.388227', 'step': 1391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:48.419230', 'step': 1391, 'epoch': 1} {'type': 'loss', 'content': 0.07688216120004654, 'timestamp': '2025-09-30 22:13:48.443067', 'step': 1392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.475430', 'step': 1392, 'epoch': 1} {'type': 'loss', 'content': 0.21732430160045624, 'timestamp': '2025-09-30 22:13:48.477626', 'step': 1393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.507645', 'step': 1393, 'epoch': 1} {'type': 'loss', 'content': 0.16784466803073883, 'timestamp': '2025-09-30 22:13:48.510007', 'step': 1394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:48.543045', 'step': 1394, 'epoch': 1} {'type': 'loss', 'content': 0.14006127417087555, 'timestamp': '2025-09-30 22:13:48.545990', 'step': 1395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:48.581024', 'step': 1395, 'epoch': 1} {'type': 'loss', 'content': 0.25079837441444397, 'timestamp': '2025-09-30 22:13:48.609152', 'step': 1396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:48.640926', 'step': 1396, 'epoch': 1} {'type': 'loss', 'content': 0.18793179094791412, 'timestamp': '2025-09-30 22:13:48.643248', 'step': 1397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:48.682149', 'step': 1397, 'epoch': 1} {'type': 'loss', 'content': 0.18945005536079407, 'timestamp': '2025-09-30 22:13:48.692496', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:13:56.185415', 'step': 1398, 'epoch': 1} {'type': 'pplx', 'content': 8088.502415666124, 'timestamp': '2025-09-30 22:13:56.190250', 'step': 1398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.227961', 'step': 1398, 'epoch': 1} {'type': 'loss', 'content': 0.2370978146791458, 'timestamp': '2025-09-30 22:13:56.230718', 'step': 1399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.260695', 'step': 1399, 'epoch': 1} {'type': 'loss', 'content': 0.14842106401920319, 'timestamp': '2025-09-30 22:13:56.285047', 'step': 1400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:56.318028', 'step': 1400, 'epoch': 1} {'type': 'loss', 'content': 0.21746458113193512, 'timestamp': '2025-09-30 22:13:56.320485', 'step': 1401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.353059', 'step': 1401, 'epoch': 1} {'type': 'loss', 'content': 0.2013019621372223, 'timestamp': '2025-09-30 22:13:56.355883', 'step': 1402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:56.385898', 'step': 1402, 'epoch': 1} {'type': 'loss', 'content': 0.16695798933506012, 'timestamp': '2025-09-30 22:13:56.388371', 'step': 1403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.421512', 'step': 1403, 'epoch': 1} {'type': 'loss', 'content': 0.1689431369304657, 'timestamp': '2025-09-30 22:13:56.445251', 'step': 1404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.474599', 'step': 1404, 'epoch': 1} {'type': 'loss', 'content': 0.17749078571796417, 'timestamp': '2025-09-30 22:13:56.477747', 'step': 1405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:56.512866', 'step': 1405, 'epoch': 1} {'type': 'loss', 'content': 0.14764289557933807, 'timestamp': '2025-09-30 22:13:56.516079', 'step': 1406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:56.546304', 'step': 1406, 'epoch': 1} {'type': 'loss', 'content': 0.1415271908044815, 'timestamp': '2025-09-30 22:13:56.549832', 'step': 1407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.581865', 'step': 1407, 'epoch': 1} {'type': 'loss', 'content': 0.2538968324661255, 'timestamp': '2025-09-30 22:13:56.605900', 'step': 1408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.635805', 'step': 1408, 'epoch': 1} {'type': 'loss', 'content': 0.15882907807826996, 'timestamp': '2025-09-30 22:13:56.638872', 'step': 1409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.669676', 'step': 1409, 'epoch': 1} {'type': 'loss', 'content': 0.16995948553085327, 'timestamp': '2025-09-30 22:13:56.671721', 'step': 1410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.701625', 'step': 1410, 'epoch': 1} {'type': 'loss', 'content': 0.17247755825519562, 'timestamp': '2025-09-30 22:13:56.704845', 'step': 1411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.735300', 'step': 1411, 'epoch': 1} {'type': 'loss', 'content': 0.1203310489654541, 'timestamp': '2025-09-30 22:13:56.759164', 'step': 1412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.792913', 'step': 1412, 'epoch': 1} {'type': 'loss', 'content': 0.22186338901519775, 'timestamp': '2025-09-30 22:13:56.798226', 'step': 1413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:56.832046', 'step': 1413, 'epoch': 1} {'type': 'loss', 'content': 0.1964615285396576, 'timestamp': '2025-09-30 22:13:56.837233', 'step': 1414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.871634', 'step': 1414, 'epoch': 1} {'type': 'loss', 'content': 0.15317343175411224, 'timestamp': '2025-09-30 22:13:56.874585', 'step': 1415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:56.904768', 'step': 1415, 'epoch': 1} {'type': 'loss', 'content': 0.10206644237041473, 'timestamp': '2025-09-30 22:13:56.928929', 'step': 1416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:56.959049', 'step': 1416, 'epoch': 1} {'type': 'loss', 'content': 0.30112171173095703, 'timestamp': '2025-09-30 22:13:56.961452', 'step': 1417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:56.992497', 'step': 1417, 'epoch': 1} {'type': 'loss', 'content': 0.18725734949111938, 'timestamp': '2025-09-30 22:13:56.995390', 'step': 1418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.025927', 'step': 1418, 'epoch': 1} {'type': 'loss', 'content': 0.21891939640045166, 'timestamp': '2025-09-30 22:13:57.028662', 'step': 1419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:57.058765', 'step': 1419, 'epoch': 1} {'type': 'loss', 'content': 0.28107836842536926, 'timestamp': '2025-09-30 22:13:57.083404', 'step': 1420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:57.112953', 'step': 1420, 'epoch': 1} {'type': 'loss', 'content': 0.15704920887947083, 'timestamp': '2025-09-30 22:13:57.117985', 'step': 1421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:57.149391', 'step': 1421, 'epoch': 1} {'type': 'loss', 'content': 0.17408984899520874, 'timestamp': '2025-09-30 22:13:57.162018', 'step': 1422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.194146', 'step': 1422, 'epoch': 1} {'type': 'loss', 'content': 0.13606643676757812, 'timestamp': '2025-09-30 22:13:57.196743', 'step': 1423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.232236', 'step': 1423, 'epoch': 1} {'type': 'loss', 'content': 0.20626004040241241, 'timestamp': '2025-09-30 22:13:57.256171', 'step': 1424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.285939', 'step': 1424, 'epoch': 1} {'type': 'loss', 'content': 0.16404391825199127, 'timestamp': '2025-09-30 22:13:57.288506', 'step': 1425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:57.320109', 'step': 1425, 'epoch': 1} {'type': 'loss', 'content': 0.25186625123023987, 'timestamp': '2025-09-30 22:13:57.322542', 'step': 1426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:57.352616', 'step': 1426, 'epoch': 1} {'type': 'loss', 'content': 0.2034032791852951, 'timestamp': '2025-09-30 22:13:57.357039', 'step': 1427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.387369', 'step': 1427, 'epoch': 1} {'type': 'loss', 'content': 0.16754665970802307, 'timestamp': '2025-09-30 22:13:57.411361', 'step': 1428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.440837', 'step': 1428, 'epoch': 1} {'type': 'loss', 'content': 0.1515641063451767, 'timestamp': '2025-09-30 22:13:57.443996', 'step': 1429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:57.473640', 'step': 1429, 'epoch': 1} {'type': 'loss', 'content': 0.19970113039016724, 'timestamp': '2025-09-30 22:13:57.476262', 'step': 1430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.508062', 'step': 1430, 'epoch': 1} {'type': 'loss', 'content': 0.2580178380012512, 'timestamp': '2025-09-30 22:13:57.511988', 'step': 1431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:13:57.541950', 'step': 1431, 'epoch': 1} {'type': 'loss', 'content': 0.22331416606903076, 'timestamp': '2025-09-30 22:13:57.565875', 'step': 1432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.596078', 'step': 1432, 'epoch': 1} {'type': 'loss', 'content': 0.14041531085968018, 'timestamp': '2025-09-30 22:13:57.600211', 'step': 1433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:13:57.631006', 'step': 1433, 'epoch': 1} {'type': 'loss', 'content': 0.28750431537628174, 'timestamp': '2025-09-30 22:13:57.633582', 'step': 1434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.663599', 'step': 1434, 'epoch': 1} {'type': 'loss', 'content': 0.12213891744613647, 'timestamp': '2025-09-30 22:13:57.667486', 'step': 1435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.698031', 'step': 1435, 'epoch': 1} {'type': 'loss', 'content': 0.13724453747272491, 'timestamp': '2025-09-30 22:13:57.722266', 'step': 1436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.752998', 'step': 1436, 'epoch': 1} {'type': 'loss', 'content': 0.20569396018981934, 'timestamp': '2025-09-30 22:13:57.757997', 'step': 1437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.790667', 'step': 1437, 'epoch': 1} {'type': 'loss', 'content': 0.15594623982906342, 'timestamp': '2025-09-30 22:13:57.796235', 'step': 1438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:57.829461', 'step': 1438, 'epoch': 1} {'type': 'loss', 'content': 0.19790303707122803, 'timestamp': '2025-09-30 22:13:57.834718', 'step': 1439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:57.869420', 'step': 1439, 'epoch': 1} {'type': 'loss', 'content': 0.1933596134185791, 'timestamp': '2025-09-30 22:13:57.897315', 'step': 1440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:57.930461', 'step': 1440, 'epoch': 1} {'type': 'loss', 'content': 0.14120197296142578, 'timestamp': '2025-09-30 22:13:57.933436', 'step': 1441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:57.966659', 'step': 1441, 'epoch': 1} {'type': 'loss', 'content': 0.2450890690088272, 'timestamp': '2025-09-30 22:13:57.974794', 'step': 1442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.005463', 'step': 1442, 'epoch': 1} {'type': 'loss', 'content': 0.1858469396829605, 'timestamp': '2025-09-30 22:13:58.009482', 'step': 1443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.039500', 'step': 1443, 'epoch': 1} {'type': 'loss', 'content': 0.2639820873737335, 'timestamp': '2025-09-30 22:13:58.063984', 'step': 1444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.095056', 'step': 1444, 'epoch': 1} {'type': 'loss', 'content': 0.34445542097091675, 'timestamp': '2025-09-30 22:13:58.097517', 'step': 1445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:58.127188', 'step': 1445, 'epoch': 1} {'type': 'loss', 'content': 0.13745979964733124, 'timestamp': '2025-09-30 22:13:58.129423', 'step': 1446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:58.161796', 'step': 1446, 'epoch': 1} {'type': 'loss', 'content': 0.15465669333934784, 'timestamp': '2025-09-30 22:13:58.164127', 'step': 1447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.193754', 'step': 1447, 'epoch': 1} {'type': 'loss', 'content': 0.25548017024993896, 'timestamp': '2025-09-30 22:13:58.219001', 'step': 1448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.250074', 'step': 1448, 'epoch': 1} {'type': 'loss', 'content': 0.16105717420578003, 'timestamp': '2025-09-30 22:13:58.252280', 'step': 1449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:58.282113', 'step': 1449, 'epoch': 1} {'type': 'loss', 'content': 0.220139741897583, 'timestamp': '2025-09-30 22:13:58.284494', 'step': 1450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.315660', 'step': 1450, 'epoch': 1} {'type': 'loss', 'content': 0.26425641775131226, 'timestamp': '2025-09-30 22:13:58.317856', 'step': 1451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.347823', 'step': 1451, 'epoch': 1} {'type': 'loss', 'content': 0.1701454520225525, 'timestamp': '2025-09-30 22:13:58.371722', 'step': 1452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.404403', 'step': 1452, 'epoch': 1} {'type': 'loss', 'content': 0.1301892101764679, 'timestamp': '2025-09-30 22:13:58.407611', 'step': 1453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.440343', 'step': 1453, 'epoch': 1} {'type': 'loss', 'content': 0.11225786060094833, 'timestamp': '2025-09-30 22:13:58.444467', 'step': 1454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.475802', 'step': 1454, 'epoch': 1} {'type': 'loss', 'content': 0.2738456726074219, 'timestamp': '2025-09-30 22:13:58.479858', 'step': 1455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.511228', 'step': 1455, 'epoch': 1} {'type': 'loss', 'content': 0.0979180857539177, 'timestamp': '2025-09-30 22:13:58.534862', 'step': 1456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.565022', 'step': 1456, 'epoch': 1} {'type': 'loss', 'content': 0.1751263290643692, 'timestamp': '2025-09-30 22:13:58.569112', 'step': 1457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.600339', 'step': 1457, 'epoch': 1} {'type': 'loss', 'content': 0.14477886259555817, 'timestamp': '2025-09-30 22:13:58.602585', 'step': 1458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.632265', 'step': 1458, 'epoch': 1} {'type': 'loss', 'content': 0.1949114352464676, 'timestamp': '2025-09-30 22:13:58.635027', 'step': 1459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.665440', 'step': 1459, 'epoch': 1} {'type': 'loss', 'content': 0.12604203820228577, 'timestamp': '2025-09-30 22:13:58.689979', 'step': 1460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:58.719677', 'step': 1460, 'epoch': 1} {'type': 'loss', 'content': 0.18787968158721924, 'timestamp': '2025-09-30 22:13:58.722504', 'step': 1461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.752267', 'step': 1461, 'epoch': 1} {'type': 'loss', 'content': 0.1903696060180664, 'timestamp': '2025-09-30 22:13:58.754499', 'step': 1462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.786578', 'step': 1462, 'epoch': 1} {'type': 'loss', 'content': 0.27979832887649536, 'timestamp': '2025-09-30 22:13:58.789390', 'step': 1463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.819601', 'step': 1463, 'epoch': 1} {'type': 'loss', 'content': 0.18652261793613434, 'timestamp': '2025-09-30 22:13:58.843280', 'step': 1464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:58.875341', 'step': 1464, 'epoch': 1} {'type': 'loss', 'content': 0.20063461363315582, 'timestamp': '2025-09-30 22:13:58.879824', 'step': 1465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.911390', 'step': 1465, 'epoch': 1} {'type': 'loss', 'content': 0.1994556039571762, 'timestamp': '2025-09-30 22:13:58.916120', 'step': 1466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:58.947795', 'step': 1466, 'epoch': 1} {'type': 'loss', 'content': 0.15524576604366302, 'timestamp': '2025-09-30 22:13:58.951719', 'step': 1467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:58.985091', 'step': 1467, 'epoch': 1} {'type': 'loss', 'content': 0.24017629027366638, 'timestamp': '2025-09-30 22:13:59.009766', 'step': 1468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:59.040416', 'step': 1468, 'epoch': 1} {'type': 'loss', 'content': 0.31977155804634094, 'timestamp': '2025-09-30 22:13:59.042808', 'step': 1469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:59.073799', 'step': 1469, 'epoch': 1} {'type': 'loss', 'content': 0.24426090717315674, 'timestamp': '2025-09-30 22:13:59.077315', 'step': 1470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.113900', 'step': 1470, 'epoch': 1} {'type': 'loss', 'content': 0.15714076161384583, 'timestamp': '2025-09-30 22:13:59.116342', 'step': 1471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.147898', 'step': 1471, 'epoch': 1} {'type': 'loss', 'content': 0.1340194195508957, 'timestamp': '2025-09-30 22:13:59.171609', 'step': 1472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:59.201355', 'step': 1472, 'epoch': 1} {'type': 'loss', 'content': 0.12433823943138123, 'timestamp': '2025-09-30 22:13:59.204294', 'step': 1473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.241944', 'step': 1473, 'epoch': 1} {'type': 'loss', 'content': 0.2010585069656372, 'timestamp': '2025-09-30 22:13:59.244166', 'step': 1474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.275475', 'step': 1474, 'epoch': 1} {'type': 'loss', 'content': 0.14758875966072083, 'timestamp': '2025-09-30 22:13:59.278430', 'step': 1475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:59.309524', 'step': 1475, 'epoch': 1} {'type': 'loss', 'content': 0.14786696434020996, 'timestamp': '2025-09-30 22:13:59.334259', 'step': 1476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.365714', 'step': 1476, 'epoch': 1} {'type': 'loss', 'content': 0.22213253378868103, 'timestamp': '2025-09-30 22:13:59.368827', 'step': 1477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.400211', 'step': 1477, 'epoch': 1} {'type': 'loss', 'content': 0.07579405605792999, 'timestamp': '2025-09-30 22:13:59.403533', 'step': 1478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:13:59.434472', 'step': 1478, 'epoch': 1} {'type': 'loss', 'content': 0.2313372790813446, 'timestamp': '2025-09-30 22:13:59.438653', 'step': 1479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.469822', 'step': 1479, 'epoch': 1} {'type': 'loss', 'content': 0.18850965797901154, 'timestamp': '2025-09-30 22:13:59.501947', 'step': 1480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.535315', 'step': 1480, 'epoch': 1} {'type': 'loss', 'content': 0.1161552369594574, 'timestamp': '2025-09-30 22:13:59.539468', 'step': 1481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.571124', 'step': 1481, 'epoch': 1} {'type': 'loss', 'content': 0.19581305980682373, 'timestamp': '2025-09-30 22:13:59.574060', 'step': 1482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.606509', 'step': 1482, 'epoch': 1} {'type': 'loss', 'content': 0.16450455784797668, 'timestamp': '2025-09-30 22:13:59.609372', 'step': 1483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.641110', 'step': 1483, 'epoch': 1} {'type': 'loss', 'content': 0.12797817587852478, 'timestamp': '2025-09-30 22:13:59.666335', 'step': 1484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.698413', 'step': 1484, 'epoch': 1} {'type': 'loss', 'content': 0.26001212000846863, 'timestamp': '2025-09-30 22:13:59.706811', 'step': 1485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:13:59.739763', 'step': 1485, 'epoch': 1} {'type': 'loss', 'content': 0.1947653591632843, 'timestamp': '2025-09-30 22:13:59.742817', 'step': 1486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.774123', 'step': 1486, 'epoch': 1} {'type': 'loss', 'content': 0.12986065447330475, 'timestamp': '2025-09-30 22:13:59.776828', 'step': 1487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.812318', 'step': 1487, 'epoch': 1} {'type': 'loss', 'content': 0.16620352864265442, 'timestamp': '2025-09-30 22:13:59.836218', 'step': 1488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:13:59.881449', 'step': 1488, 'epoch': 1} {'type': 'loss', 'content': 0.20285604894161224, 'timestamp': '2025-09-30 22:13:59.887432', 'step': 1489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.919388', 'step': 1489, 'epoch': 1} {'type': 'loss', 'content': 0.21024495363235474, 'timestamp': '2025-09-30 22:13:59.923397', 'step': 1490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:13:59.955990', 'step': 1490, 'epoch': 1} {'type': 'loss', 'content': 0.16167022287845612, 'timestamp': '2025-09-30 22:13:59.958124', 'step': 1491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:00.004670', 'step': 1491, 'epoch': 1} {'type': 'loss', 'content': 0.18437431752681732, 'timestamp': '2025-09-30 22:14:00.030150', 'step': 1492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:00.060337', 'step': 1492, 'epoch': 1} {'type': 'loss', 'content': 0.26143595576286316, 'timestamp': '2025-09-30 22:14:00.062356', 'step': 1493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:00.097936', 'step': 1493, 'epoch': 1} {'type': 'loss', 'content': 0.15555180609226227, 'timestamp': '2025-09-30 22:14:00.101645', 'step': 1494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:00.138286', 'step': 1494, 'epoch': 1} {'type': 'loss', 'content': 0.19738824665546417, 'timestamp': '2025-09-30 22:14:00.140897', 'step': 1495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:00.176665', 'step': 1495, 'epoch': 1} {'type': 'loss', 'content': 0.331387996673584, 'timestamp': '2025-09-30 22:14:00.200311', 'step': 1496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:00.230962', 'step': 1496, 'epoch': 1} {'type': 'loss', 'content': 0.1793590486049652, 'timestamp': '2025-09-30 22:14:00.235863', 'step': 1497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:00.267558', 'step': 1497, 'epoch': 1} {'type': 'loss', 'content': 0.13445086777210236, 'timestamp': '2025-09-30 22:14:00.271083', 'step': 1498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:00.308698', 'step': 1498, 'epoch': 1} {'type': 'loss', 'content': 0.11663525551557541, 'timestamp': '2025-09-30 22:14:00.315862', 'step': 1499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:00.349421', 'step': 1499, 'epoch': 1} {'type': 'loss', 'content': 0.1940567046403885, 'timestamp': '2025-09-30 22:14:00.377099', 'step': 1500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 1500', 'timestamp': '2025-09-30 22:14:04.897254', 'step': 1500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:04.929879', 'step': 1500, 'epoch': 1} {'type': 'loss', 'content': 0.20854777097702026, 'timestamp': '2025-09-30 22:14:04.933626', 'step': 1501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:04.967014', 'step': 1501, 'epoch': 1} {'type': 'loss', 'content': 0.2968514859676361, 'timestamp': '2025-09-30 22:14:04.970876', 'step': 1502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:05.002435', 'step': 1502, 'epoch': 1} {'type': 'loss', 'content': 0.1632671356201172, 'timestamp': '2025-09-30 22:14:05.004924', 'step': 1503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.035109', 'step': 1503, 'epoch': 1} {'type': 'loss', 'content': 0.11838462203741074, 'timestamp': '2025-09-30 22:14:05.059343', 'step': 1504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.089314', 'step': 1504, 'epoch': 1} {'type': 'loss', 'content': 0.17967022955417633, 'timestamp': '2025-09-30 22:14:05.092330', 'step': 1505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:05.122337', 'step': 1505, 'epoch': 1} {'type': 'loss', 'content': 0.1848667860031128, 'timestamp': '2025-09-30 22:14:05.124479', 'step': 1506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.156751', 'step': 1506, 'epoch': 1} {'type': 'loss', 'content': 0.19970867037773132, 'timestamp': '2025-09-30 22:14:05.162779', 'step': 1507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:05.194277', 'step': 1507, 'epoch': 1} {'type': 'loss', 'content': 0.16940180957317352, 'timestamp': '2025-09-30 22:14:05.217994', 'step': 1508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:05.247952', 'step': 1508, 'epoch': 1} {'type': 'loss', 'content': 0.1607845276594162, 'timestamp': '2025-09-30 22:14:05.255551', 'step': 1509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:05.287320', 'step': 1509, 'epoch': 1} {'type': 'loss', 'content': 0.20119459927082062, 'timestamp': '2025-09-30 22:14:05.291022', 'step': 1510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:05.322461', 'step': 1510, 'epoch': 1} {'type': 'loss', 'content': 0.15069393813610077, 'timestamp': '2025-09-30 22:14:05.326565', 'step': 1511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.358378', 'step': 1511, 'epoch': 1} {'type': 'loss', 'content': 0.1741325408220291, 'timestamp': '2025-09-30 22:14:05.382479', 'step': 1512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.414596', 'step': 1512, 'epoch': 1} {'type': 'loss', 'content': 0.15798114240169525, 'timestamp': '2025-09-30 22:14:05.417111', 'step': 1513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.447162', 'step': 1513, 'epoch': 1} {'type': 'loss', 'content': 0.3689381182193756, 'timestamp': '2025-09-30 22:14:05.450119', 'step': 1514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:05.480829', 'step': 1514, 'epoch': 1} {'type': 'loss', 'content': 0.2471754550933838, 'timestamp': '2025-09-30 22:14:05.484142', 'step': 1515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.524185', 'step': 1515, 'epoch': 1} {'type': 'loss', 'content': 0.18417347967624664, 'timestamp': '2025-09-30 22:14:05.552642', 'step': 1516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.582842', 'step': 1516, 'epoch': 1} {'type': 'loss', 'content': 0.15401533246040344, 'timestamp': '2025-09-30 22:14:05.589483', 'step': 1517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.624802', 'step': 1517, 'epoch': 1} {'type': 'loss', 'content': 0.21217922866344452, 'timestamp': '2025-09-30 22:14:05.627106', 'step': 1518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:05.658062', 'step': 1518, 'epoch': 1} {'type': 'loss', 'content': 0.33876124024391174, 'timestamp': '2025-09-30 22:14:05.660685', 'step': 1519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.691172', 'step': 1519, 'epoch': 1} {'type': 'loss', 'content': 0.0983218103647232, 'timestamp': '2025-09-30 22:14:05.715602', 'step': 1520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.746504', 'step': 1520, 'epoch': 1} {'type': 'loss', 'content': 0.05811913311481476, 'timestamp': '2025-09-30 22:14:05.748714', 'step': 1521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:05.781419', 'step': 1521, 'epoch': 1} {'type': 'loss', 'content': 0.18868917226791382, 'timestamp': '2025-09-30 22:14:05.785234', 'step': 1522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:05.817173', 'step': 1522, 'epoch': 1} {'type': 'loss', 'content': 0.19170646369457245, 'timestamp': '2025-09-30 22:14:05.820153', 'step': 1523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:05.857938', 'step': 1523, 'epoch': 1} {'type': 'loss', 'content': 0.20797482132911682, 'timestamp': '2025-09-30 22:14:05.882516', 'step': 1524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:05.913831', 'step': 1524, 'epoch': 1} {'type': 'loss', 'content': 0.21239855885505676, 'timestamp': '2025-09-30 22:14:05.917661', 'step': 1525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:05.948360', 'step': 1525, 'epoch': 1} {'type': 'loss', 'content': 0.20519129931926727, 'timestamp': '2025-09-30 22:14:05.950750', 'step': 1526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:05.980808', 'step': 1526, 'epoch': 1} {'type': 'loss', 'content': 0.23214635252952576, 'timestamp': '2025-09-30 22:14:05.984093', 'step': 1527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:06.021221', 'step': 1527, 'epoch': 1} {'type': 'loss', 'content': 0.12040869891643524, 'timestamp': '2025-09-30 22:14:06.044960', 'step': 1528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.075084', 'step': 1528, 'epoch': 1} {'type': 'loss', 'content': 0.2050832360982895, 'timestamp': '2025-09-30 22:14:06.077359', 'step': 1529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.113918', 'step': 1529, 'epoch': 1} {'type': 'loss', 'content': 0.14790208637714386, 'timestamp': '2025-09-30 22:14:06.116183', 'step': 1530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.146416', 'step': 1530, 'epoch': 1} {'type': 'loss', 'content': 0.15314576029777527, 'timestamp': '2025-09-30 22:14:06.148898', 'step': 1531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.179354', 'step': 1531, 'epoch': 1} {'type': 'loss', 'content': 0.2203534096479416, 'timestamp': '2025-09-30 22:14:06.202970', 'step': 1532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:06.232536', 'step': 1532, 'epoch': 1} {'type': 'loss', 'content': 0.16854624450206757, 'timestamp': '2025-09-30 22:14:06.235390', 'step': 1533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.266910', 'step': 1533, 'epoch': 1} {'type': 'loss', 'content': 0.14577755331993103, 'timestamp': '2025-09-30 22:14:06.269434', 'step': 1534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.298731', 'step': 1534, 'epoch': 1} {'type': 'loss', 'content': 0.2484736293554306, 'timestamp': '2025-09-30 22:14:06.301605', 'step': 1535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:06.332276', 'step': 1535, 'epoch': 1} {'type': 'loss', 'content': 0.0934491902589798, 'timestamp': '2025-09-30 22:14:06.357256', 'step': 1536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.388952', 'step': 1536, 'epoch': 1} {'type': 'loss', 'content': 0.12947586178779602, 'timestamp': '2025-09-30 22:14:06.390992', 'step': 1537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:06.421478', 'step': 1537, 'epoch': 1} {'type': 'loss', 'content': 0.2698758542537689, 'timestamp': '2025-09-30 22:14:06.424040', 'step': 1538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.454511', 'step': 1538, 'epoch': 1} {'type': 'loss', 'content': 0.18268056213855743, 'timestamp': '2025-09-30 22:14:06.456755', 'step': 1539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:06.487102', 'step': 1539, 'epoch': 1} {'type': 'loss', 'content': 0.18259239196777344, 'timestamp': '2025-09-30 22:14:06.510898', 'step': 1540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.541329', 'step': 1540, 'epoch': 1} {'type': 'loss', 'content': 0.25216564536094666, 'timestamp': '2025-09-30 22:14:06.544517', 'step': 1541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:06.576366', 'step': 1541, 'epoch': 1} {'type': 'loss', 'content': 0.2096002846956253, 'timestamp': '2025-09-30 22:14:06.579002', 'step': 1542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.609697', 'step': 1542, 'epoch': 1} {'type': 'loss', 'content': 0.10051213204860687, 'timestamp': '2025-09-30 22:14:06.611965', 'step': 1543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.642049', 'step': 1543, 'epoch': 1} {'type': 'loss', 'content': 0.1239510104060173, 'timestamp': '2025-09-30 22:14:06.666394', 'step': 1544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.696506', 'step': 1544, 'epoch': 1} {'type': 'loss', 'content': 0.3138977885246277, 'timestamp': '2025-09-30 22:14:06.698958', 'step': 1545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.728995', 'step': 1545, 'epoch': 1} {'type': 'loss', 'content': 0.12130320817232132, 'timestamp': '2025-09-30 22:14:06.731925', 'step': 1546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.762807', 'step': 1546, 'epoch': 1} {'type': 'loss', 'content': 0.1015641987323761, 'timestamp': '2025-09-30 22:14:06.765921', 'step': 1547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:06.796617', 'step': 1547, 'epoch': 1} {'type': 'loss', 'content': 0.20035922527313232, 'timestamp': '2025-09-30 22:14:06.820185', 'step': 1548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.850872', 'step': 1548, 'epoch': 1} {'type': 'loss', 'content': 0.17206759750843048, 'timestamp': '2025-09-30 22:14:06.853795', 'step': 1549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:06.884510', 'step': 1549, 'epoch': 1} {'type': 'loss', 'content': 0.12008413672447205, 'timestamp': '2025-09-30 22:14:06.887471', 'step': 1550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:06.918129', 'step': 1550, 'epoch': 1} {'type': 'loss', 'content': 0.1421329528093338, 'timestamp': '2025-09-30 22:14:06.920576', 'step': 1551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:06.950715', 'step': 1551, 'epoch': 1} {'type': 'loss', 'content': 0.10390856117010117, 'timestamp': '2025-09-30 22:14:06.974284', 'step': 1552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.006109', 'step': 1552, 'epoch': 1} {'type': 'loss', 'content': 0.18400679528713226, 'timestamp': '2025-09-30 22:14:07.008628', 'step': 1553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.047190', 'step': 1553, 'epoch': 1} {'type': 'loss', 'content': 0.175434872508049, 'timestamp': '2025-09-30 22:14:07.049352', 'step': 1554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.079039', 'step': 1554, 'epoch': 1} {'type': 'loss', 'content': 0.3049333989620209, 'timestamp': '2025-09-30 22:14:07.081226', 'step': 1555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.113013', 'step': 1555, 'epoch': 1} {'type': 'loss', 'content': 0.1823868453502655, 'timestamp': '2025-09-30 22:14:07.137023', 'step': 1556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.167272', 'step': 1556, 'epoch': 1} {'type': 'loss', 'content': 0.1924741119146347, 'timestamp': '2025-09-30 22:14:07.169820', 'step': 1557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.201822', 'step': 1557, 'epoch': 1} {'type': 'loss', 'content': 0.1380549520254135, 'timestamp': '2025-09-30 22:14:07.205349', 'step': 1558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:07.236346', 'step': 1558, 'epoch': 1} {'type': 'loss', 'content': 0.18994975090026855, 'timestamp': '2025-09-30 22:14:07.238693', 'step': 1559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.270156', 'step': 1559, 'epoch': 1} {'type': 'loss', 'content': 0.2097051590681076, 'timestamp': '2025-09-30 22:14:07.293813', 'step': 1560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.324179', 'step': 1560, 'epoch': 1} {'type': 'loss', 'content': 0.21081846952438354, 'timestamp': '2025-09-30 22:14:07.326913', 'step': 1561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.357989', 'step': 1561, 'epoch': 1} {'type': 'loss', 'content': 0.1827659159898758, 'timestamp': '2025-09-30 22:14:07.363540', 'step': 1562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:07.395597', 'step': 1562, 'epoch': 1} {'type': 'loss', 'content': 0.2002420276403427, 'timestamp': '2025-09-30 22:14:07.398056', 'step': 1563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.429356', 'step': 1563, 'epoch': 1} {'type': 'loss', 'content': 0.15280543267726898, 'timestamp': '2025-09-30 22:14:07.452972', 'step': 1564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.483365', 'step': 1564, 'epoch': 1} {'type': 'loss', 'content': 0.15575741231441498, 'timestamp': '2025-09-30 22:14:07.489665', 'step': 1565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:07.520345', 'step': 1565, 'epoch': 1} {'type': 'loss', 'content': 0.2382345050573349, 'timestamp': '2025-09-30 22:14:07.522809', 'step': 1566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:07.553760', 'step': 1566, 'epoch': 1} {'type': 'loss', 'content': 0.10205237567424774, 'timestamp': '2025-09-30 22:14:07.556999', 'step': 1567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.587785', 'step': 1567, 'epoch': 1} {'type': 'loss', 'content': 0.2466157227754593, 'timestamp': '2025-09-30 22:14:07.611833', 'step': 1568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:07.642410', 'step': 1568, 'epoch': 1} {'type': 'loss', 'content': 0.14936938881874084, 'timestamp': '2025-09-30 22:14:07.646602', 'step': 1569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.677873', 'step': 1569, 'epoch': 1} {'type': 'loss', 'content': 0.19759249687194824, 'timestamp': '2025-09-30 22:14:07.680544', 'step': 1570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:07.712170', 'step': 1570, 'epoch': 1} {'type': 'loss', 'content': 0.1351178139448166, 'timestamp': '2025-09-30 22:14:07.715275', 'step': 1571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.745337', 'step': 1571, 'epoch': 1} {'type': 'loss', 'content': 0.3152213990688324, 'timestamp': '2025-09-30 22:14:07.769766', 'step': 1572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:07.799623', 'step': 1572, 'epoch': 1} {'type': 'loss', 'content': 0.14720439910888672, 'timestamp': '2025-09-30 22:14:07.802257', 'step': 1573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:07.833804', 'step': 1573, 'epoch': 1} {'type': 'loss', 'content': 0.10120175033807755, 'timestamp': '2025-09-30 22:14:07.838047', 'step': 1574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.868757', 'step': 1574, 'epoch': 1} {'type': 'loss', 'content': 0.1933673620223999, 'timestamp': '2025-09-30 22:14:07.872132', 'step': 1575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:07.903079', 'step': 1575, 'epoch': 1} {'type': 'loss', 'content': 0.19050781428813934, 'timestamp': '2025-09-30 22:14:07.926549', 'step': 1576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.956977', 'step': 1576, 'epoch': 1} {'type': 'loss', 'content': 0.17985020577907562, 'timestamp': '2025-09-30 22:14:07.959123', 'step': 1577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:07.989939', 'step': 1577, 'epoch': 1} {'type': 'loss', 'content': 0.1468070149421692, 'timestamp': '2025-09-30 22:14:07.993326', 'step': 1578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.024136', 'step': 1578, 'epoch': 1} {'type': 'loss', 'content': 0.1655348837375641, 'timestamp': '2025-09-30 22:14:08.027146', 'step': 1579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.057907', 'step': 1579, 'epoch': 1} {'type': 'loss', 'content': 0.20551109313964844, 'timestamp': '2025-09-30 22:14:08.082276', 'step': 1580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:08.113267', 'step': 1580, 'epoch': 1} {'type': 'loss', 'content': 0.13537120819091797, 'timestamp': '2025-09-30 22:14:08.116770', 'step': 1581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.158010', 'step': 1581, 'epoch': 1} {'type': 'loss', 'content': 0.20588456094264984, 'timestamp': '2025-09-30 22:14:08.161546', 'step': 1582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.193430', 'step': 1582, 'epoch': 1} {'type': 'loss', 'content': 0.21967774629592896, 'timestamp': '2025-09-30 22:14:08.197306', 'step': 1583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.229031', 'step': 1583, 'epoch': 1} {'type': 'loss', 'content': 0.175260990858078, 'timestamp': '2025-09-30 22:14:08.252855', 'step': 1584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.292320', 'step': 1584, 'epoch': 1} {'type': 'loss', 'content': 0.1456373780965805, 'timestamp': '2025-09-30 22:14:08.296234', 'step': 1585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.329074', 'step': 1585, 'epoch': 1} {'type': 'loss', 'content': 0.1490357518196106, 'timestamp': '2025-09-30 22:14:08.331802', 'step': 1586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.362356', 'step': 1586, 'epoch': 1} {'type': 'loss', 'content': 0.2739621102809906, 'timestamp': '2025-09-30 22:14:08.370048', 'step': 1587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.400855', 'step': 1587, 'epoch': 1} {'type': 'loss', 'content': 0.16455955803394318, 'timestamp': '2025-09-30 22:14:08.431622', 'step': 1588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.462537', 'step': 1588, 'epoch': 1} {'type': 'loss', 'content': 0.13593223690986633, 'timestamp': '2025-09-30 22:14:08.466927', 'step': 1589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.498293', 'step': 1589, 'epoch': 1} {'type': 'loss', 'content': 0.21985235810279846, 'timestamp': '2025-09-30 22:14:08.501015', 'step': 1590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.537333', 'step': 1590, 'epoch': 1} {'type': 'loss', 'content': 0.17453736066818237, 'timestamp': '2025-09-30 22:14:08.540929', 'step': 1591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.572486', 'step': 1591, 'epoch': 1} {'type': 'loss', 'content': 0.23489001393318176, 'timestamp': '2025-09-30 22:14:08.597529', 'step': 1592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.628757', 'step': 1592, 'epoch': 1} {'type': 'loss', 'content': 0.15001049637794495, 'timestamp': '2025-09-30 22:14:08.632680', 'step': 1593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.662824', 'step': 1593, 'epoch': 1} {'type': 'loss', 'content': 0.2941732108592987, 'timestamp': '2025-09-30 22:14:08.668537', 'step': 1594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.705408', 'step': 1594, 'epoch': 1} {'type': 'loss', 'content': 0.1603616625070572, 'timestamp': '2025-09-30 22:14:08.709542', 'step': 1595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.742083', 'step': 1595, 'epoch': 1} {'type': 'loss', 'content': 0.22574234008789062, 'timestamp': '2025-09-30 22:14:08.766011', 'step': 1596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.798134', 'step': 1596, 'epoch': 1} {'type': 'loss', 'content': 0.20388361811637878, 'timestamp': '2025-09-30 22:14:08.800566', 'step': 1597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.831228', 'step': 1597, 'epoch': 1} {'type': 'loss', 'content': 0.14534401893615723, 'timestamp': '2025-09-30 22:14:08.834573', 'step': 1598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:08.865302', 'step': 1598, 'epoch': 1} {'type': 'loss', 'content': 0.2010813057422638, 'timestamp': '2025-09-30 22:14:08.867455', 'step': 1599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:08.899686', 'step': 1599, 'epoch': 1} {'type': 'loss', 'content': 0.19847872853279114, 'timestamp': '2025-09-30 22:14:08.923354', 'step': 1600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.954063', 'step': 1600, 'epoch': 1} {'type': 'loss', 'content': 0.16709809005260468, 'timestamp': '2025-09-30 22:14:08.956268', 'step': 1601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:08.986435', 'step': 1601, 'epoch': 1} {'type': 'loss', 'content': 0.1526934951543808, 'timestamp': '2025-09-30 22:14:08.989302', 'step': 1602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:09.020282', 'step': 1602, 'epoch': 1} {'type': 'loss', 'content': 0.1321025788784027, 'timestamp': '2025-09-30 22:14:09.024594', 'step': 1603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.064437', 'step': 1603, 'epoch': 1} {'type': 'loss', 'content': 0.17466500401496887, 'timestamp': '2025-09-30 22:14:09.089078', 'step': 1604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.119621', 'step': 1604, 'epoch': 1} {'type': 'loss', 'content': 0.15133482217788696, 'timestamp': '2025-09-30 22:14:09.122533', 'step': 1605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.153494', 'step': 1605, 'epoch': 1} {'type': 'loss', 'content': 0.2156778872013092, 'timestamp': '2025-09-30 22:14:09.155364', 'step': 1606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:09.185645', 'step': 1606, 'epoch': 1} {'type': 'loss', 'content': 0.10665086656808853, 'timestamp': '2025-09-30 22:14:09.188627', 'step': 1607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.219312', 'step': 1607, 'epoch': 1} {'type': 'loss', 'content': 0.19857902824878693, 'timestamp': '2025-09-30 22:14:09.243290', 'step': 1608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.273486', 'step': 1608, 'epoch': 1} {'type': 'loss', 'content': 0.1850428432226181, 'timestamp': '2025-09-30 22:14:09.276109', 'step': 1609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.318488', 'step': 1609, 'epoch': 1} {'type': 'loss', 'content': 0.21460111439228058, 'timestamp': '2025-09-30 22:14:09.326051', 'step': 1610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.356587', 'step': 1610, 'epoch': 1} {'type': 'loss', 'content': 0.2215723842382431, 'timestamp': '2025-09-30 22:14:09.359548', 'step': 1611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.391269', 'step': 1611, 'epoch': 1} {'type': 'loss', 'content': 0.22779631614685059, 'timestamp': '2025-09-30 22:14:09.415158', 'step': 1612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.445207', 'step': 1612, 'epoch': 1} {'type': 'loss', 'content': 0.1365775167942047, 'timestamp': '2025-09-30 22:14:09.448544', 'step': 1613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.480574', 'step': 1613, 'epoch': 1} {'type': 'loss', 'content': 0.15251362323760986, 'timestamp': '2025-09-30 22:14:09.483565', 'step': 1614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.515398', 'step': 1614, 'epoch': 1} {'type': 'loss', 'content': 0.2876754701137543, 'timestamp': '2025-09-30 22:14:09.528020', 'step': 1615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.558041', 'step': 1615, 'epoch': 1} {'type': 'loss', 'content': 0.14426718652248383, 'timestamp': '2025-09-30 22:14:09.591927', 'step': 1616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.622191', 'step': 1616, 'epoch': 1} {'type': 'loss', 'content': 0.21279621124267578, 'timestamp': '2025-09-30 22:14:09.624804', 'step': 1617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.654764', 'step': 1617, 'epoch': 1} {'type': 'loss', 'content': 0.1921006143093109, 'timestamp': '2025-09-30 22:14:09.657880', 'step': 1618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.688572', 'step': 1618, 'epoch': 1} {'type': 'loss', 'content': 0.18976512551307678, 'timestamp': '2025-09-30 22:14:09.691473', 'step': 1619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:09.722964', 'step': 1619, 'epoch': 1} {'type': 'loss', 'content': 0.14158526062965393, 'timestamp': '2025-09-30 22:14:09.746334', 'step': 1620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.787420', 'step': 1620, 'epoch': 1} {'type': 'loss', 'content': 0.2441226840019226, 'timestamp': '2025-09-30 22:14:09.790294', 'step': 1621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:09.820678', 'step': 1621, 'epoch': 1} {'type': 'loss', 'content': 0.19854572415351868, 'timestamp': '2025-09-30 22:14:09.822920', 'step': 1622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:09.854180', 'step': 1622, 'epoch': 1} {'type': 'loss', 'content': 0.1872665137052536, 'timestamp': '2025-09-30 22:14:09.856996', 'step': 1623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.886761', 'step': 1623, 'epoch': 1} {'type': 'loss', 'content': 0.3245084285736084, 'timestamp': '2025-09-30 22:14:09.911381', 'step': 1624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.942542', 'step': 1624, 'epoch': 1} {'type': 'loss', 'content': 0.1478944569826126, 'timestamp': '2025-09-30 22:14:09.945860', 'step': 1625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:09.983550', 'step': 1625, 'epoch': 1} {'type': 'loss', 'content': 0.08515326678752899, 'timestamp': '2025-09-30 22:14:09.987064', 'step': 1626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:10.018332', 'step': 1626, 'epoch': 1} {'type': 'loss', 'content': 0.14668351411819458, 'timestamp': '2025-09-30 22:14:10.020964', 'step': 1627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.051976', 'step': 1627, 'epoch': 1} {'type': 'loss', 'content': 0.1500556319952011, 'timestamp': '2025-09-30 22:14:10.076903', 'step': 1628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.107363', 'step': 1628, 'epoch': 1} {'type': 'loss', 'content': 0.1266430914402008, 'timestamp': '2025-09-30 22:14:10.110351', 'step': 1629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:10.140949', 'step': 1629, 'epoch': 1} {'type': 'loss', 'content': 0.1462351530790329, 'timestamp': '2025-09-30 22:14:10.145307', 'step': 1630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.176591', 'step': 1630, 'epoch': 1} {'type': 'loss', 'content': 0.11281568557024002, 'timestamp': '2025-09-30 22:14:10.179512', 'step': 1631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:10.210409', 'step': 1631, 'epoch': 1} {'type': 'loss', 'content': 0.17488956451416016, 'timestamp': '2025-09-30 22:14:10.234794', 'step': 1632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:10.265525', 'step': 1632, 'epoch': 1} {'type': 'loss', 'content': 0.16350452601909637, 'timestamp': '2025-09-30 22:14:10.268033', 'step': 1633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.304273', 'step': 1633, 'epoch': 1} {'type': 'loss', 'content': 0.1509820967912674, 'timestamp': '2025-09-30 22:14:10.308561', 'step': 1634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.339238', 'step': 1634, 'epoch': 1} {'type': 'loss', 'content': 0.17958404123783112, 'timestamp': '2025-09-30 22:14:10.341558', 'step': 1635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.372100', 'step': 1635, 'epoch': 1} {'type': 'loss', 'content': 0.1795811951160431, 'timestamp': '2025-09-30 22:14:10.396642', 'step': 1636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.426772', 'step': 1636, 'epoch': 1} {'type': 'loss', 'content': 0.23221324384212494, 'timestamp': '2025-09-30 22:14:10.429355', 'step': 1637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.459785', 'step': 1637, 'epoch': 1} {'type': 'loss', 'content': 0.32110685110092163, 'timestamp': '2025-09-30 22:14:10.462223', 'step': 1638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.492252', 'step': 1638, 'epoch': 1} {'type': 'loss', 'content': 0.26297736167907715, 'timestamp': '2025-09-30 22:14:10.495441', 'step': 1639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:10.524721', 'step': 1639, 'epoch': 1} {'type': 'loss', 'content': 0.20887884497642517, 'timestamp': '2025-09-30 22:14:10.548896', 'step': 1640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:10.579552', 'step': 1640, 'epoch': 1} {'type': 'loss', 'content': 0.2019655555486679, 'timestamp': '2025-09-30 22:14:10.581762', 'step': 1641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:10.612797', 'step': 1641, 'epoch': 1} {'type': 'loss', 'content': 0.13631930947303772, 'timestamp': '2025-09-30 22:14:10.615162', 'step': 1642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.645757', 'step': 1642, 'epoch': 1} {'type': 'loss', 'content': 0.15873241424560547, 'timestamp': '2025-09-30 22:14:10.648356', 'step': 1643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:10.681501', 'step': 1643, 'epoch': 1} {'type': 'loss', 'content': 0.2630627155303955, 'timestamp': '2025-09-30 22:14:10.705427', 'step': 1644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:10.736743', 'step': 1644, 'epoch': 1} {'type': 'loss', 'content': 0.11017175018787384, 'timestamp': '2025-09-30 22:14:10.738802', 'step': 1645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.770548', 'step': 1645, 'epoch': 1} {'type': 'loss', 'content': 0.12093441188335419, 'timestamp': '2025-09-30 22:14:10.774229', 'step': 1646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.805599', 'step': 1646, 'epoch': 1} {'type': 'loss', 'content': 0.12749126553535461, 'timestamp': '2025-09-30 22:14:10.808661', 'step': 1647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:10.839595', 'step': 1647, 'epoch': 1} {'type': 'loss', 'content': 0.1684044450521469, 'timestamp': '2025-09-30 22:14:10.864696', 'step': 1648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.895102', 'step': 1648, 'epoch': 1} {'type': 'loss', 'content': 0.17059572041034698, 'timestamp': '2025-09-30 22:14:10.897620', 'step': 1649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.928644', 'step': 1649, 'epoch': 1} {'type': 'loss', 'content': 0.18239766359329224, 'timestamp': '2025-09-30 22:14:10.931853', 'step': 1650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.962678', 'step': 1650, 'epoch': 1} {'type': 'loss', 'content': 0.17422634363174438, 'timestamp': '2025-09-30 22:14:10.964814', 'step': 1651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:10.995599', 'step': 1651, 'epoch': 1} {'type': 'loss', 'content': 0.11047057807445526, 'timestamp': '2025-09-30 22:14:11.020491', 'step': 1652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.052721', 'step': 1652, 'epoch': 1} {'type': 'loss', 'content': 0.2102941870689392, 'timestamp': '2025-09-30 22:14:11.055798', 'step': 1653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.086657', 'step': 1653, 'epoch': 1} {'type': 'loss', 'content': 0.12035458534955978, 'timestamp': '2025-09-30 22:14:11.089881', 'step': 1654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.120487', 'step': 1654, 'epoch': 1} {'type': 'loss', 'content': 0.20623302459716797, 'timestamp': '2025-09-30 22:14:11.123437', 'step': 1655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.154837', 'step': 1655, 'epoch': 1} {'type': 'loss', 'content': 0.3012793958187103, 'timestamp': '2025-09-30 22:14:11.179431', 'step': 1656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:11.211183', 'step': 1656, 'epoch': 1} {'type': 'loss', 'content': 0.16284586489200592, 'timestamp': '2025-09-30 22:14:11.214172', 'step': 1657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.245124', 'step': 1657, 'epoch': 1} {'type': 'loss', 'content': 0.11070868372917175, 'timestamp': '2025-09-30 22:14:11.247849', 'step': 1658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.278741', 'step': 1658, 'epoch': 1} {'type': 'loss', 'content': 0.1565137356519699, 'timestamp': '2025-09-30 22:14:11.284371', 'step': 1659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.318274', 'step': 1659, 'epoch': 1} {'type': 'loss', 'content': 0.14683975279331207, 'timestamp': '2025-09-30 22:14:11.344782', 'step': 1660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:11.377394', 'step': 1660, 'epoch': 1} {'type': 'loss', 'content': 0.14750084280967712, 'timestamp': '2025-09-30 22:14:11.379613', 'step': 1661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:11.410887', 'step': 1661, 'epoch': 1} {'type': 'loss', 'content': 0.20718203485012054, 'timestamp': '2025-09-30 22:14:11.417998', 'step': 1662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.447851', 'step': 1662, 'epoch': 1} {'type': 'loss', 'content': 0.12762603163719177, 'timestamp': '2025-09-30 22:14:11.450574', 'step': 1663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:11.482152', 'step': 1663, 'epoch': 1} {'type': 'loss', 'content': 0.12308492511510849, 'timestamp': '2025-09-30 22:14:11.506197', 'step': 1664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.536380', 'step': 1664, 'epoch': 1} {'type': 'loss', 'content': 0.12958522140979767, 'timestamp': '2025-09-30 22:14:11.538837', 'step': 1665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.569290', 'step': 1665, 'epoch': 1} {'type': 'loss', 'content': 0.1573982685804367, 'timestamp': '2025-09-30 22:14:11.571480', 'step': 1666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:11.608729', 'step': 1666, 'epoch': 1} {'type': 'loss', 'content': 0.19448621571063995, 'timestamp': '2025-09-30 22:14:11.611392', 'step': 1667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.642222', 'step': 1667, 'epoch': 1} {'type': 'loss', 'content': 0.20320050418376923, 'timestamp': '2025-09-30 22:14:11.666173', 'step': 1668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.698151', 'step': 1668, 'epoch': 1} {'type': 'loss', 'content': 0.11679939925670624, 'timestamp': '2025-09-30 22:14:11.700335', 'step': 1669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.731437', 'step': 1669, 'epoch': 1} {'type': 'loss', 'content': 0.1976546347141266, 'timestamp': '2025-09-30 22:14:11.734016', 'step': 1670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:11.764877', 'step': 1670, 'epoch': 1} {'type': 'loss', 'content': 0.12707051634788513, 'timestamp': '2025-09-30 22:14:11.767403', 'step': 1671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:11.798990', 'step': 1671, 'epoch': 1} {'type': 'loss', 'content': 0.20217430591583252, 'timestamp': '2025-09-30 22:14:11.823702', 'step': 1672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:11.854512', 'step': 1672, 'epoch': 1} {'type': 'loss', 'content': 0.18475501239299774, 'timestamp': '2025-09-30 22:14:11.857121', 'step': 1673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:11.889075', 'step': 1673, 'epoch': 1} {'type': 'loss', 'content': 0.16525238752365112, 'timestamp': '2025-09-30 22:14:11.891242', 'step': 1674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:11.921422', 'step': 1674, 'epoch': 1} {'type': 'loss', 'content': 0.21157029271125793, 'timestamp': '2025-09-30 22:14:11.923661', 'step': 1675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:11.954363', 'step': 1675, 'epoch': 1} {'type': 'loss', 'content': 0.16926081478595734, 'timestamp': '2025-09-30 22:14:11.978023', 'step': 1676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.008569', 'step': 1676, 'epoch': 1} {'type': 'loss', 'content': 0.1388055980205536, 'timestamp': '2025-09-30 22:14:12.010843', 'step': 1677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.042076', 'step': 1677, 'epoch': 1} {'type': 'loss', 'content': 0.15130984783172607, 'timestamp': '2025-09-30 22:14:12.044933', 'step': 1678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:14:12.076543', 'step': 1678, 'epoch': 1} {'type': 'loss', 'content': 0.15224996209144592, 'timestamp': '2025-09-30 22:14:12.083589', 'step': 1679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.114908', 'step': 1679, 'epoch': 1} {'type': 'loss', 'content': 0.18177452683448792, 'timestamp': '2025-09-30 22:14:12.143213', 'step': 1680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:12.180606', 'step': 1680, 'epoch': 1} {'type': 'loss', 'content': 0.1081276535987854, 'timestamp': '2025-09-30 22:14:12.183177', 'step': 1681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:12.214559', 'step': 1681, 'epoch': 1} {'type': 'loss', 'content': 0.12024299055337906, 'timestamp': '2025-09-30 22:14:12.218346', 'step': 1682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:12.249081', 'step': 1682, 'epoch': 1} {'type': 'loss', 'content': 0.16012577712535858, 'timestamp': '2025-09-30 22:14:12.251642', 'step': 1683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.293903', 'step': 1683, 'epoch': 1} {'type': 'loss', 'content': 0.2067670226097107, 'timestamp': '2025-09-30 22:14:12.328642', 'step': 1684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.361718', 'step': 1684, 'epoch': 1} {'type': 'loss', 'content': 0.33089789748191833, 'timestamp': '2025-09-30 22:14:12.376446', 'step': 1685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.418894', 'step': 1685, 'epoch': 1} {'type': 'loss', 'content': 0.18464075028896332, 'timestamp': '2025-09-30 22:14:12.425353', 'step': 1686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:12.459397', 'step': 1686, 'epoch': 1} {'type': 'loss', 'content': 0.2638137936592102, 'timestamp': '2025-09-30 22:14:12.465378', 'step': 1687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:12.498503', 'step': 1687, 'epoch': 1} {'type': 'loss', 'content': 0.10965298861265182, 'timestamp': '2025-09-30 22:14:12.526269', 'step': 1688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:12.572662', 'step': 1688, 'epoch': 1} {'type': 'loss', 'content': 0.13483136892318726, 'timestamp': '2025-09-30 22:14:12.577344', 'step': 1689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:12.610274', 'step': 1689, 'epoch': 1} {'type': 'loss', 'content': 0.24480335414409637, 'timestamp': '2025-09-30 22:14:12.614404', 'step': 1690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:12.656624', 'step': 1690, 'epoch': 1} {'type': 'loss', 'content': 0.31505921483039856, 'timestamp': '2025-09-30 22:14:12.670121', 'step': 1691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:12.703314', 'step': 1691, 'epoch': 1} {'type': 'loss', 'content': 0.14594215154647827, 'timestamp': '2025-09-30 22:14:12.727885', 'step': 1692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:12.759282', 'step': 1692, 'epoch': 1} {'type': 'loss', 'content': 0.24654412269592285, 'timestamp': '2025-09-30 22:14:12.770899', 'step': 1693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:12.802378', 'step': 1693, 'epoch': 1} {'type': 'loss', 'content': 0.146057590842247, 'timestamp': '2025-09-30 22:14:12.806017', 'step': 1694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:12.849896', 'step': 1694, 'epoch': 1} {'type': 'loss', 'content': 0.13230930268764496, 'timestamp': '2025-09-30 22:14:12.855019', 'step': 1695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:12.898810', 'step': 1695, 'epoch': 1} {'type': 'loss', 'content': 0.12946827709674835, 'timestamp': '2025-09-30 22:14:12.925485', 'step': 1696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:12.957582', 'step': 1696, 'epoch': 1} {'type': 'loss', 'content': 0.22823360562324524, 'timestamp': '2025-09-30 22:14:12.961635', 'step': 1697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:12.994844', 'step': 1697, 'epoch': 1} {'type': 'loss', 'content': 0.16795961558818817, 'timestamp': '2025-09-30 22:14:12.999356', 'step': 1698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:13.044099', 'step': 1698, 'epoch': 1} {'type': 'loss', 'content': 0.1620999574661255, 'timestamp': '2025-09-30 22:14:13.047572', 'step': 1699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:13.080104', 'step': 1699, 'epoch': 1} {'type': 'loss', 'content': 0.20484326779842377, 'timestamp': '2025-09-30 22:14:13.106223', 'step': 1700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:13.137712', 'step': 1700, 'epoch': 1} {'type': 'loss', 'content': 0.09670718759298325, 'timestamp': '2025-09-30 22:14:13.144475', 'step': 1701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:13.177991', 'step': 1701, 'epoch': 1} {'type': 'loss', 'content': 0.1632530838251114, 'timestamp': '2025-09-30 22:14:13.182997', 'step': 1702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:13.239374', 'step': 1702, 'epoch': 1} {'type': 'loss', 'content': 0.1482948511838913, 'timestamp': '2025-09-30 22:14:13.245538', 'step': 1703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.278816', 'step': 1703, 'epoch': 1} {'type': 'loss', 'content': 0.21175900101661682, 'timestamp': '2025-09-30 22:14:13.304891', 'step': 1704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.348196', 'step': 1704, 'epoch': 1} {'type': 'loss', 'content': 0.24046529829502106, 'timestamp': '2025-09-30 22:14:13.352123', 'step': 1705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.385925', 'step': 1705, 'epoch': 1} {'type': 'loss', 'content': 0.15800726413726807, 'timestamp': '2025-09-30 22:14:13.391124', 'step': 1706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:13.424312', 'step': 1706, 'epoch': 1} {'type': 'loss', 'content': 0.2245473712682724, 'timestamp': '2025-09-30 22:14:13.429203', 'step': 1707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:13.461154', 'step': 1707, 'epoch': 1} {'type': 'loss', 'content': 0.13969099521636963, 'timestamp': '2025-09-30 22:14:13.486284', 'step': 1708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:13.525184', 'step': 1708, 'epoch': 1} {'type': 'loss', 'content': 0.13914895057678223, 'timestamp': '2025-09-30 22:14:13.529295', 'step': 1709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:13.561085', 'step': 1709, 'epoch': 1} {'type': 'loss', 'content': 0.20662876963615417, 'timestamp': '2025-09-30 22:14:13.568418', 'step': 1710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:13.599794', 'step': 1710, 'epoch': 1} {'type': 'loss', 'content': 0.13148155808448792, 'timestamp': '2025-09-30 22:14:13.604030', 'step': 1711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:13.634882', 'step': 1711, 'epoch': 1} {'type': 'loss', 'content': 0.19917826354503632, 'timestamp': '2025-09-30 22:14:13.658627', 'step': 1712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:13.689730', 'step': 1712, 'epoch': 1} {'type': 'loss', 'content': 0.13121609389781952, 'timestamp': '2025-09-30 22:14:13.691977', 'step': 1713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.723430', 'step': 1713, 'epoch': 1} {'type': 'loss', 'content': 0.25365832448005676, 'timestamp': '2025-09-30 22:14:13.728269', 'step': 1714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:13.760082', 'step': 1714, 'epoch': 1} {'type': 'loss', 'content': 0.3044983446598053, 'timestamp': '2025-09-30 22:14:13.764269', 'step': 1715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.796985', 'step': 1715, 'epoch': 1} {'type': 'loss', 'content': 0.1548893004655838, 'timestamp': '2025-09-30 22:14:13.821203', 'step': 1716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:13.852503', 'step': 1716, 'epoch': 1} {'type': 'loss', 'content': 0.13415919244289398, 'timestamp': '2025-09-30 22:14:13.859867', 'step': 1717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:13.897261', 'step': 1717, 'epoch': 1} {'type': 'loss', 'content': 0.15565623342990875, 'timestamp': '2025-09-30 22:14:13.900904', 'step': 1718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:13.931716', 'step': 1718, 'epoch': 1} {'type': 'loss', 'content': 0.32051926851272583, 'timestamp': '2025-09-30 22:14:13.940803', 'step': 1719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:13.978591', 'step': 1719, 'epoch': 1} {'type': 'loss', 'content': 0.13362561166286469, 'timestamp': '2025-09-30 22:14:14.003468', 'step': 1720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.035026', 'step': 1720, 'epoch': 1} {'type': 'loss', 'content': 0.1831579953432083, 'timestamp': '2025-09-30 22:14:14.037898', 'step': 1721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:14.071992', 'step': 1721, 'epoch': 1} {'type': 'loss', 'content': 0.15500831604003906, 'timestamp': '2025-09-30 22:14:14.076498', 'step': 1722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:14.110327', 'step': 1722, 'epoch': 1} {'type': 'loss', 'content': 0.19156742095947266, 'timestamp': '2025-09-30 22:14:14.118563', 'step': 1723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.150897', 'step': 1723, 'epoch': 1} {'type': 'loss', 'content': 0.17672882974147797, 'timestamp': '2025-09-30 22:14:14.175156', 'step': 1724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:14.209225', 'step': 1724, 'epoch': 1} {'type': 'loss', 'content': 0.12066204100847244, 'timestamp': '2025-09-30 22:14:14.220445', 'step': 1725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:14.251289', 'step': 1725, 'epoch': 1} {'type': 'loss', 'content': 0.1579703986644745, 'timestamp': '2025-09-30 22:14:14.260123', 'step': 1726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.297427', 'step': 1726, 'epoch': 1} {'type': 'loss', 'content': 0.1668216586112976, 'timestamp': '2025-09-30 22:14:14.301057', 'step': 1727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.331330', 'step': 1727, 'epoch': 1} {'type': 'loss', 'content': 0.10401487350463867, 'timestamp': '2025-09-30 22:14:14.358282', 'step': 1728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.388185', 'step': 1728, 'epoch': 1} {'type': 'loss', 'content': 0.24453504383563995, 'timestamp': '2025-09-30 22:14:14.392179', 'step': 1729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.422436', 'step': 1729, 'epoch': 1} {'type': 'loss', 'content': 0.18431569635868073, 'timestamp': '2025-09-30 22:14:14.427387', 'step': 1730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.462125', 'step': 1730, 'epoch': 1} {'type': 'loss', 'content': 0.08914131671190262, 'timestamp': '2025-09-30 22:14:14.464950', 'step': 1731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.497541', 'step': 1731, 'epoch': 1} {'type': 'loss', 'content': 0.1299351304769516, 'timestamp': '2025-09-30 22:14:14.523826', 'step': 1732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:14.554962', 'step': 1732, 'epoch': 1} {'type': 'loss', 'content': 0.2708994150161743, 'timestamp': '2025-09-30 22:14:14.558813', 'step': 1733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:14.591555', 'step': 1733, 'epoch': 1} {'type': 'loss', 'content': 0.2079521119594574, 'timestamp': '2025-09-30 22:14:14.600637', 'step': 1734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.634836', 'step': 1734, 'epoch': 1} {'type': 'loss', 'content': 0.1933697611093521, 'timestamp': '2025-09-30 22:14:14.637633', 'step': 1735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.669301', 'step': 1735, 'epoch': 1} {'type': 'loss', 'content': 0.17944592237472534, 'timestamp': '2025-09-30 22:14:14.693266', 'step': 1736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.725504', 'step': 1736, 'epoch': 1} {'type': 'loss', 'content': 0.12368565797805786, 'timestamp': '2025-09-30 22:14:14.733787', 'step': 1737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:14.769801', 'step': 1737, 'epoch': 1} {'type': 'loss', 'content': 0.21578240394592285, 'timestamp': '2025-09-30 22:14:14.778708', 'step': 1738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.809442', 'step': 1738, 'epoch': 1} {'type': 'loss', 'content': 0.20313428342342377, 'timestamp': '2025-09-30 22:14:14.812636', 'step': 1739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:14.843182', 'step': 1739, 'epoch': 1} {'type': 'loss', 'content': 0.22866924107074738, 'timestamp': '2025-09-30 22:14:14.869641', 'step': 1740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:14.902283', 'step': 1740, 'epoch': 1} {'type': 'loss', 'content': 0.16956491768360138, 'timestamp': '2025-09-30 22:14:14.904476', 'step': 1741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:14.934180', 'step': 1741, 'epoch': 1} {'type': 'loss', 'content': 0.18304762244224548, 'timestamp': '2025-09-30 22:14:14.936802', 'step': 1742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:14.975174', 'step': 1742, 'epoch': 1} {'type': 'loss', 'content': 0.1711135357618332, 'timestamp': '2025-09-30 22:14:14.978129', 'step': 1743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.015095', 'step': 1743, 'epoch': 1} {'type': 'loss', 'content': 0.17466890811920166, 'timestamp': '2025-09-30 22:14:15.042063', 'step': 1744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:15.079257', 'step': 1744, 'epoch': 1} {'type': 'loss', 'content': 0.17961667478084564, 'timestamp': '2025-09-30 22:14:15.085170', 'step': 1745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:15.120594', 'step': 1745, 'epoch': 1} {'type': 'loss', 'content': 0.1414010375738144, 'timestamp': '2025-09-30 22:14:15.125369', 'step': 1746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.163738', 'step': 1746, 'epoch': 1} {'type': 'loss', 'content': 0.14938588440418243, 'timestamp': '2025-09-30 22:14:15.170137', 'step': 1747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.202567', 'step': 1747, 'epoch': 1} {'type': 'loss', 'content': 0.1795821189880371, 'timestamp': '2025-09-30 22:14:15.229608', 'step': 1748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.266572', 'step': 1748, 'epoch': 1} {'type': 'loss', 'content': 0.18348993360996246, 'timestamp': '2025-09-30 22:14:15.271377', 'step': 1749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.303453', 'step': 1749, 'epoch': 1} {'type': 'loss', 'content': 0.14826437830924988, 'timestamp': '2025-09-30 22:14:15.308722', 'step': 1750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.344890', 'step': 1750, 'epoch': 1} {'type': 'loss', 'content': 0.1531512588262558, 'timestamp': '2025-09-30 22:14:15.348676', 'step': 1751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.383820', 'step': 1751, 'epoch': 1} {'type': 'loss', 'content': 0.2710922658443451, 'timestamp': '2025-09-30 22:14:15.409208', 'step': 1752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.452115', 'step': 1752, 'epoch': 1} {'type': 'loss', 'content': 0.1901238113641739, 'timestamp': '2025-09-30 22:14:15.456085', 'step': 1753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.488074', 'step': 1753, 'epoch': 1} {'type': 'loss', 'content': 0.23502112925052643, 'timestamp': '2025-09-30 22:14:15.491980', 'step': 1754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.522140', 'step': 1754, 'epoch': 1} {'type': 'loss', 'content': 0.10206476598978043, 'timestamp': '2025-09-30 22:14:15.524273', 'step': 1755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.571001', 'step': 1755, 'epoch': 1} {'type': 'loss', 'content': 0.10445097833871841, 'timestamp': '2025-09-30 22:14:15.598356', 'step': 1756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:15.630086', 'step': 1756, 'epoch': 1} {'type': 'loss', 'content': 0.13694404065608978, 'timestamp': '2025-09-30 22:14:15.638673', 'step': 1757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.669563', 'step': 1757, 'epoch': 1} {'type': 'loss', 'content': 0.22545284032821655, 'timestamp': '2025-09-30 22:14:15.678663', 'step': 1758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.713642', 'step': 1758, 'epoch': 1} {'type': 'loss', 'content': 0.13367384672164917, 'timestamp': '2025-09-30 22:14:15.722921', 'step': 1759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:15.757416', 'step': 1759, 'epoch': 1} {'type': 'loss', 'content': 0.13061559200286865, 'timestamp': '2025-09-30 22:14:15.784270', 'step': 1760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.822995', 'step': 1760, 'epoch': 1} {'type': 'loss', 'content': 0.13031929731369019, 'timestamp': '2025-09-30 22:14:15.828624', 'step': 1761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:15.860781', 'step': 1761, 'epoch': 1} {'type': 'loss', 'content': 0.18132124841213226, 'timestamp': '2025-09-30 22:14:15.870293', 'step': 1762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.903796', 'step': 1762, 'epoch': 1} {'type': 'loss', 'content': 0.1752740442752838, 'timestamp': '2025-09-30 22:14:15.906379', 'step': 1763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:15.939160', 'step': 1763, 'epoch': 1} {'type': 'loss', 'content': 0.21969343721866608, 'timestamp': '2025-09-30 22:14:15.969745', 'step': 1764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:16.000060', 'step': 1764, 'epoch': 1} {'type': 'loss', 'content': 0.13264966011047363, 'timestamp': '2025-09-30 22:14:16.008312', 'step': 1765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:16.039178', 'step': 1765, 'epoch': 1} {'type': 'loss', 'content': 0.13588735461235046, 'timestamp': '2025-09-30 22:14:16.044156', 'step': 1766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.074953', 'step': 1766, 'epoch': 1} {'type': 'loss', 'content': 0.12087012827396393, 'timestamp': '2025-09-30 22:14:16.081639', 'step': 1767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:16.118673', 'step': 1767, 'epoch': 1} {'type': 'loss', 'content': 0.16586598753929138, 'timestamp': '2025-09-30 22:14:16.144753', 'step': 1768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.185125', 'step': 1768, 'epoch': 1} {'type': 'loss', 'content': 0.14284254610538483, 'timestamp': '2025-09-30 22:14:16.191378', 'step': 1769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.235695', 'step': 1769, 'epoch': 1} {'type': 'loss', 'content': 0.19513632357120514, 'timestamp': '2025-09-30 22:14:16.238007', 'step': 1770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:16.273386', 'step': 1770, 'epoch': 1} {'type': 'loss', 'content': 0.1507730633020401, 'timestamp': '2025-09-30 22:14:16.280950', 'step': 1771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.317477', 'step': 1771, 'epoch': 1} {'type': 'loss', 'content': 0.16838932037353516, 'timestamp': '2025-09-30 22:14:16.342523', 'step': 1772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.376330', 'step': 1772, 'epoch': 1} {'type': 'loss', 'content': 0.2545446753501892, 'timestamp': '2025-09-30 22:14:16.378919', 'step': 1773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:16.414942', 'step': 1773, 'epoch': 1} {'type': 'loss', 'content': 0.23670636117458344, 'timestamp': '2025-09-30 22:14:16.418713', 'step': 1774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:16.453646', 'step': 1774, 'epoch': 1} {'type': 'loss', 'content': 0.22038109600543976, 'timestamp': '2025-09-30 22:14:16.458089', 'step': 1775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.489176', 'step': 1775, 'epoch': 1} {'type': 'loss', 'content': 0.16316191852092743, 'timestamp': '2025-09-30 22:14:16.518710', 'step': 1776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.549101', 'step': 1776, 'epoch': 1} {'type': 'loss', 'content': 0.24514837563037872, 'timestamp': '2025-09-30 22:14:16.551817', 'step': 1777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.583163', 'step': 1777, 'epoch': 1} {'type': 'loss', 'content': 0.19012820720672607, 'timestamp': '2025-09-30 22:14:16.586588', 'step': 1778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.620677', 'step': 1778, 'epoch': 1} {'type': 'loss', 'content': 0.21282756328582764, 'timestamp': '2025-09-30 22:14:16.624115', 'step': 1779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:16.655096', 'step': 1779, 'epoch': 1} {'type': 'loss', 'content': 0.20411647856235504, 'timestamp': '2025-09-30 22:14:16.679447', 'step': 1780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.710750', 'step': 1780, 'epoch': 1} {'type': 'loss', 'content': 0.27593210339546204, 'timestamp': '2025-09-30 22:14:16.716877', 'step': 1781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.750810', 'step': 1781, 'epoch': 1} {'type': 'loss', 'content': 0.164380744099617, 'timestamp': '2025-09-30 22:14:16.754819', 'step': 1782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.785751', 'step': 1782, 'epoch': 1} {'type': 'loss', 'content': 0.17453603446483612, 'timestamp': '2025-09-30 22:14:16.788396', 'step': 1783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:16.821387', 'step': 1783, 'epoch': 1} {'type': 'loss', 'content': 0.2345484495162964, 'timestamp': '2025-09-30 22:14:16.848310', 'step': 1784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:16.879090', 'step': 1784, 'epoch': 1} {'type': 'loss', 'content': 0.1510297656059265, 'timestamp': '2025-09-30 22:14:16.887462', 'step': 1785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.921598', 'step': 1785, 'epoch': 1} {'type': 'loss', 'content': 0.0759645476937294, 'timestamp': '2025-09-30 22:14:16.924717', 'step': 1786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:16.955795', 'step': 1786, 'epoch': 1} {'type': 'loss', 'content': 0.18541386723518372, 'timestamp': '2025-09-30 22:14:16.961180', 'step': 1787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:16.998646', 'step': 1787, 'epoch': 1} {'type': 'loss', 'content': 0.11270935088396072, 'timestamp': '2025-09-30 22:14:17.025900', 'step': 1788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.059610', 'step': 1788, 'epoch': 1} {'type': 'loss', 'content': 0.2268390953540802, 'timestamp': '2025-09-30 22:14:17.066446', 'step': 1789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.098129', 'step': 1789, 'epoch': 1} {'type': 'loss', 'content': 0.2187349498271942, 'timestamp': '2025-09-30 22:14:17.105502', 'step': 1790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.137578', 'step': 1790, 'epoch': 1} {'type': 'loss', 'content': 0.14341287314891815, 'timestamp': '2025-09-30 22:14:17.140792', 'step': 1791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.171206', 'step': 1791, 'epoch': 1} {'type': 'loss', 'content': 0.19908641278743744, 'timestamp': '2025-09-30 22:14:17.199776', 'step': 1792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.232814', 'step': 1792, 'epoch': 1} {'type': 'loss', 'content': 0.14391584694385529, 'timestamp': '2025-09-30 22:14:17.235729', 'step': 1793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.266480', 'step': 1793, 'epoch': 1} {'type': 'loss', 'content': 0.2052241414785385, 'timestamp': '2025-09-30 22:14:17.269116', 'step': 1794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:17.299441', 'step': 1794, 'epoch': 1} {'type': 'loss', 'content': 0.10244643688201904, 'timestamp': '2025-09-30 22:14:17.304604', 'step': 1795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.337787', 'step': 1795, 'epoch': 1} {'type': 'loss', 'content': 0.2117956429719925, 'timestamp': '2025-09-30 22:14:17.361970', 'step': 1796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.391849', 'step': 1796, 'epoch': 1} {'type': 'loss', 'content': 0.26367032527923584, 'timestamp': '2025-09-30 22:14:17.394323', 'step': 1797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:17.426777', 'step': 1797, 'epoch': 1} {'type': 'loss', 'content': 0.2647322714328766, 'timestamp': '2025-09-30 22:14:17.429286', 'step': 1798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.461650', 'step': 1798, 'epoch': 1} {'type': 'loss', 'content': 0.2116299271583557, 'timestamp': '2025-09-30 22:14:17.464207', 'step': 1799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.497836', 'step': 1799, 'epoch': 1} {'type': 'loss', 'content': 0.1667552888393402, 'timestamp': '2025-09-30 22:14:17.522876', 'step': 1800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:17.555068', 'step': 1800, 'epoch': 1} {'type': 'loss', 'content': 0.17269332706928253, 'timestamp': '2025-09-30 22:14:17.558596', 'step': 1801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.590264', 'step': 1801, 'epoch': 1} {'type': 'loss', 'content': 0.09267641603946686, 'timestamp': '2025-09-30 22:14:17.599058', 'step': 1802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:17.636584', 'step': 1802, 'epoch': 1} {'type': 'loss', 'content': 0.12981104850769043, 'timestamp': '2025-09-30 22:14:17.645570', 'step': 1803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:17.680757', 'step': 1803, 'epoch': 1} {'type': 'loss', 'content': 0.17066222429275513, 'timestamp': '2025-09-30 22:14:17.705045', 'step': 1804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:17.736458', 'step': 1804, 'epoch': 1} {'type': 'loss', 'content': 0.19474326074123383, 'timestamp': '2025-09-30 22:14:17.738547', 'step': 1805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.772276', 'step': 1805, 'epoch': 1} {'type': 'loss', 'content': 0.21253643929958344, 'timestamp': '2025-09-30 22:14:17.777831', 'step': 1806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:17.811653', 'step': 1806, 'epoch': 1} {'type': 'loss', 'content': 0.15569987893104553, 'timestamp': '2025-09-30 22:14:17.817440', 'step': 1807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:17.850647', 'step': 1807, 'epoch': 1} {'type': 'loss', 'content': 0.20627626776695251, 'timestamp': '2025-09-30 22:14:17.878835', 'step': 1808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:17.911026', 'step': 1808, 'epoch': 1} {'type': 'loss', 'content': 0.2344961166381836, 'timestamp': '2025-09-30 22:14:17.920760', 'step': 1809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:14:17.956835', 'step': 1809, 'epoch': 1} {'type': 'loss', 'content': 0.17750310897827148, 'timestamp': '2025-09-30 22:14:17.962801', 'step': 1810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:17.999588', 'step': 1810, 'epoch': 1} {'type': 'loss', 'content': 0.15139120817184448, 'timestamp': '2025-09-30 22:14:18.004517', 'step': 1811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.038217', 'step': 1811, 'epoch': 1} {'type': 'loss', 'content': 0.2104691118001938, 'timestamp': '2025-09-30 22:14:18.065803', 'step': 1812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:18.101576', 'step': 1812, 'epoch': 1} {'type': 'loss', 'content': 0.14764024317264557, 'timestamp': '2025-09-30 22:14:18.110236', 'step': 1813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:14:18.147836', 'step': 1813, 'epoch': 1} {'type': 'loss', 'content': 0.1356777548789978, 'timestamp': '2025-09-30 22:14:18.152535', 'step': 1814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.186530', 'step': 1814, 'epoch': 1} {'type': 'loss', 'content': 0.16337241232395172, 'timestamp': '2025-09-30 22:14:18.192130', 'step': 1815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.226009', 'step': 1815, 'epoch': 1} {'type': 'loss', 'content': 0.23343437910079956, 'timestamp': '2025-09-30 22:14:18.251745', 'step': 1816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.284158', 'step': 1816, 'epoch': 1} {'type': 'loss', 'content': 0.1538107693195343, 'timestamp': '2025-09-30 22:14:18.289391', 'step': 1817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.322445', 'step': 1817, 'epoch': 1} {'type': 'loss', 'content': 0.09204621613025665, 'timestamp': '2025-09-30 22:14:18.327365', 'step': 1818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.373159', 'step': 1818, 'epoch': 1} {'type': 'loss', 'content': 0.16012810170650482, 'timestamp': '2025-09-30 22:14:18.375786', 'step': 1819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.419351', 'step': 1819, 'epoch': 1} {'type': 'loss', 'content': 0.20844195783138275, 'timestamp': '2025-09-30 22:14:18.444352', 'step': 1820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.483071', 'step': 1820, 'epoch': 1} {'type': 'loss', 'content': 0.13739918172359467, 'timestamp': '2025-09-30 22:14:18.490232', 'step': 1821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.526961', 'step': 1821, 'epoch': 1} {'type': 'loss', 'content': 0.14872843027114868, 'timestamp': '2025-09-30 22:14:18.536110', 'step': 1822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:18.574593', 'step': 1822, 'epoch': 1} {'type': 'loss', 'content': 0.13809210062026978, 'timestamp': '2025-09-30 22:14:18.581808', 'step': 1823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:18.618065', 'step': 1823, 'epoch': 1} {'type': 'loss', 'content': 0.1375306248664856, 'timestamp': '2025-09-30 22:14:18.647757', 'step': 1824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:18.680054', 'step': 1824, 'epoch': 1} {'type': 'loss', 'content': 0.2170713096857071, 'timestamp': '2025-09-30 22:14:18.685897', 'step': 1825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.722170', 'step': 1825, 'epoch': 1} {'type': 'loss', 'content': 0.15892203152179718, 'timestamp': '2025-09-30 22:14:18.725118', 'step': 1826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.755282', 'step': 1826, 'epoch': 1} {'type': 'loss', 'content': 0.1871659904718399, 'timestamp': '2025-09-30 22:14:18.760226', 'step': 1827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.791173', 'step': 1827, 'epoch': 1} {'type': 'loss', 'content': 0.10220678150653839, 'timestamp': '2025-09-30 22:14:18.816504', 'step': 1828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.849351', 'step': 1828, 'epoch': 1} {'type': 'loss', 'content': 0.1597013771533966, 'timestamp': '2025-09-30 22:14:18.854753', 'step': 1829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.887297', 'step': 1829, 'epoch': 1} {'type': 'loss', 'content': 0.06194636970758438, 'timestamp': '2025-09-30 22:14:18.891644', 'step': 1830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:18.923798', 'step': 1830, 'epoch': 1} {'type': 'loss', 'content': 0.19365324079990387, 'timestamp': '2025-09-30 22:14:18.926164', 'step': 1831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:18.958088', 'step': 1831, 'epoch': 1} {'type': 'loss', 'content': 0.1655263453722, 'timestamp': '2025-09-30 22:14:18.983465', 'step': 1832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:19.015553', 'step': 1832, 'epoch': 1} {'type': 'loss', 'content': 0.2065235823392868, 'timestamp': '2025-09-30 22:14:19.019615', 'step': 1833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:19.051637', 'step': 1833, 'epoch': 1} {'type': 'loss', 'content': 0.2244095504283905, 'timestamp': '2025-09-30 22:14:19.055839', 'step': 1834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:19.088803', 'step': 1834, 'epoch': 1} {'type': 'loss', 'content': 0.09292618930339813, 'timestamp': '2025-09-30 22:14:19.092424', 'step': 1835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.134377', 'step': 1835, 'epoch': 1} {'type': 'loss', 'content': 0.2043037712574005, 'timestamp': '2025-09-30 22:14:19.158721', 'step': 1836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:19.190231', 'step': 1836, 'epoch': 1} {'type': 'loss', 'content': 0.10885177552700043, 'timestamp': '2025-09-30 22:14:19.194300', 'step': 1837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:19.226316', 'step': 1837, 'epoch': 1} {'type': 'loss', 'content': 0.22679546475410461, 'timestamp': '2025-09-30 22:14:19.232425', 'step': 1838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.266492', 'step': 1838, 'epoch': 1} {'type': 'loss', 'content': 0.17965663969516754, 'timestamp': '2025-09-30 22:14:19.270600', 'step': 1839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.300764', 'step': 1839, 'epoch': 1} {'type': 'loss', 'content': 0.23291808366775513, 'timestamp': '2025-09-30 22:14:19.326894', 'step': 1840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.358981', 'step': 1840, 'epoch': 1} {'type': 'loss', 'content': 0.08486145734786987, 'timestamp': '2025-09-30 22:14:19.363711', 'step': 1841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.396540', 'step': 1841, 'epoch': 1} {'type': 'loss', 'content': 0.14597956836223602, 'timestamp': '2025-09-30 22:14:19.398923', 'step': 1842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:19.433903', 'step': 1842, 'epoch': 1} {'type': 'loss', 'content': 0.15724192559719086, 'timestamp': '2025-09-30 22:14:19.439767', 'step': 1843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.473284', 'step': 1843, 'epoch': 1} {'type': 'loss', 'content': 0.16680099070072174, 'timestamp': '2025-09-30 22:14:19.498830', 'step': 1844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.528851', 'step': 1844, 'epoch': 1} {'type': 'loss', 'content': 0.15061302483081818, 'timestamp': '2025-09-30 22:14:19.531888', 'step': 1845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.565467', 'step': 1845, 'epoch': 1} {'type': 'loss', 'content': 0.20442822575569153, 'timestamp': '2025-09-30 22:14:19.569273', 'step': 1846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:19.601084', 'step': 1846, 'epoch': 1} {'type': 'loss', 'content': 0.14800690114498138, 'timestamp': '2025-09-30 22:14:19.605055', 'step': 1847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.636843', 'step': 1847, 'epoch': 1} {'type': 'loss', 'content': 0.16589002311229706, 'timestamp': '2025-09-30 22:14:19.661634', 'step': 1848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.693252', 'step': 1848, 'epoch': 1} {'type': 'loss', 'content': 0.20280633866786957, 'timestamp': '2025-09-30 22:14:19.697762', 'step': 1849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.729405', 'step': 1849, 'epoch': 1} {'type': 'loss', 'content': 0.1495167464017868, 'timestamp': '2025-09-30 22:14:19.733720', 'step': 1850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.766095', 'step': 1850, 'epoch': 1} {'type': 'loss', 'content': 0.16982603073120117, 'timestamp': '2025-09-30 22:14:19.770405', 'step': 1851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.803496', 'step': 1851, 'epoch': 1} {'type': 'loss', 'content': 0.1944146454334259, 'timestamp': '2025-09-30 22:14:19.828870', 'step': 1852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.863486', 'step': 1852, 'epoch': 1} {'type': 'loss', 'content': 0.22974689304828644, 'timestamp': '2025-09-30 22:14:19.869791', 'step': 1853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:19.900564', 'step': 1853, 'epoch': 1} {'type': 'loss', 'content': 0.11699691414833069, 'timestamp': '2025-09-30 22:14:19.904407', 'step': 1854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.938982', 'step': 1854, 'epoch': 1} {'type': 'loss', 'content': 0.15469422936439514, 'timestamp': '2025-09-30 22:14:19.944128', 'step': 1855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:19.976885', 'step': 1855, 'epoch': 1} {'type': 'loss', 'content': 0.1280580759048462, 'timestamp': '2025-09-30 22:14:20.002949', 'step': 1856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:20.034603', 'step': 1856, 'epoch': 1} {'type': 'loss', 'content': 0.1572801023721695, 'timestamp': '2025-09-30 22:14:20.037642', 'step': 1857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:20.068935', 'step': 1857, 'epoch': 1} {'type': 'loss', 'content': 0.07237700372934341, 'timestamp': '2025-09-30 22:14:20.072712', 'step': 1858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:20.103060', 'step': 1858, 'epoch': 1} {'type': 'loss', 'content': 0.1011275202035904, 'timestamp': '2025-09-30 22:14:20.105434', 'step': 1859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:20.136966', 'step': 1859, 'epoch': 1} {'type': 'loss', 'content': 0.14233119785785675, 'timestamp': '2025-09-30 22:14:20.161193', 'step': 1860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:20.191538', 'step': 1860, 'epoch': 1} {'type': 'loss', 'content': 0.14155472815036774, 'timestamp': '2025-09-30 22:14:20.194976', 'step': 1861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:20.227112', 'step': 1861, 'epoch': 1} {'type': 'loss', 'content': 0.1860140860080719, 'timestamp': '2025-09-30 22:14:20.231232', 'step': 1862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:20.263161', 'step': 1862, 'epoch': 1} {'type': 'loss', 'content': 0.17608579993247986, 'timestamp': '2025-09-30 22:14:20.266240', 'step': 1863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:20.299087', 'step': 1863, 'epoch': 1} {'type': 'loss', 'content': 0.32090017199516296, 'timestamp': '2025-09-30 22:14:20.322828', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:14:28.071439', 'step': 1864, 'epoch': 1} {'type': 'pplx', 'content': 8512.965266283367, 'timestamp': '2025-09-30 22:14:28.073894', 'step': 1864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.102419', 'step': 1864, 'epoch': 1} {'type': 'loss', 'content': 0.1774810552597046, 'timestamp': '2025-09-30 22:14:28.104910', 'step': 1865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.135324', 'step': 1865, 'epoch': 1} {'type': 'loss', 'content': 0.2231571078300476, 'timestamp': '2025-09-30 22:14:28.137460', 'step': 1866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.167638', 'step': 1866, 'epoch': 1} {'type': 'loss', 'content': 0.08972158282995224, 'timestamp': '2025-09-30 22:14:28.169612', 'step': 1867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.200701', 'step': 1867, 'epoch': 1} {'type': 'loss', 'content': 0.2577265799045563, 'timestamp': '2025-09-30 22:14:28.225105', 'step': 1868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.256983', 'step': 1868, 'epoch': 1} {'type': 'loss', 'content': 0.12104971706867218, 'timestamp': '2025-09-30 22:14:28.259414', 'step': 1869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.290888', 'step': 1869, 'epoch': 1} {'type': 'loss', 'content': 0.14788460731506348, 'timestamp': '2025-09-30 22:14:28.293181', 'step': 1870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.323303', 'step': 1870, 'epoch': 1} {'type': 'loss', 'content': 0.21722282469272614, 'timestamp': '2025-09-30 22:14:28.325433', 'step': 1871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.356573', 'step': 1871, 'epoch': 1} {'type': 'loss', 'content': 0.1898142546415329, 'timestamp': '2025-09-30 22:14:28.380640', 'step': 1872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.411009', 'step': 1872, 'epoch': 1} {'type': 'loss', 'content': 0.21154673397541046, 'timestamp': '2025-09-30 22:14:28.413124', 'step': 1873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:28.443411', 'step': 1873, 'epoch': 1} {'type': 'loss', 'content': 0.13285119831562042, 'timestamp': '2025-09-30 22:14:28.445549', 'step': 1874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.476513', 'step': 1874, 'epoch': 1} {'type': 'loss', 'content': 0.17383593320846558, 'timestamp': '2025-09-30 22:14:28.478705', 'step': 1875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.510579', 'step': 1875, 'epoch': 1} {'type': 'loss', 'content': 0.1264616996049881, 'timestamp': '2025-09-30 22:14:28.534350', 'step': 1876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:28.565163', 'step': 1876, 'epoch': 1} {'type': 'loss', 'content': 0.22580623626708984, 'timestamp': '2025-09-30 22:14:28.567552', 'step': 1877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.598277', 'step': 1877, 'epoch': 1} {'type': 'loss', 'content': 0.13581867516040802, 'timestamp': '2025-09-30 22:14:28.600394', 'step': 1878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.631800', 'step': 1878, 'epoch': 1} {'type': 'loss', 'content': 0.12361860275268555, 'timestamp': '2025-09-30 22:14:28.633873', 'step': 1879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.665733', 'step': 1879, 'epoch': 1} {'type': 'loss', 'content': 0.1993483304977417, 'timestamp': '2025-09-30 22:14:28.690183', 'step': 1880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.720951', 'step': 1880, 'epoch': 1} {'type': 'loss', 'content': 0.21094754338264465, 'timestamp': '2025-09-30 22:14:28.723024', 'step': 1881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.762381', 'step': 1881, 'epoch': 1} {'type': 'loss', 'content': 0.10332950949668884, 'timestamp': '2025-09-30 22:14:28.764580', 'step': 1882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.794856', 'step': 1882, 'epoch': 1} {'type': 'loss', 'content': 0.1915806233882904, 'timestamp': '2025-09-30 22:14:28.797829', 'step': 1883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.828710', 'step': 1883, 'epoch': 1} {'type': 'loss', 'content': 0.17288124561309814, 'timestamp': '2025-09-30 22:14:28.852866', 'step': 1884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:28.883671', 'step': 1884, 'epoch': 1} {'type': 'loss', 'content': 0.1252836138010025, 'timestamp': '2025-09-30 22:14:28.885836', 'step': 1885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:28.916007', 'step': 1885, 'epoch': 1} {'type': 'loss', 'content': 0.13872593641281128, 'timestamp': '2025-09-30 22:14:28.918155', 'step': 1886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:28.948742', 'step': 1886, 'epoch': 1} {'type': 'loss', 'content': 0.1644255667924881, 'timestamp': '2025-09-30 22:14:28.950986', 'step': 1887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:28.990805', 'step': 1887, 'epoch': 1} {'type': 'loss', 'content': 0.21441394090652466, 'timestamp': '2025-09-30 22:14:29.014294', 'step': 1888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.044949', 'step': 1888, 'epoch': 1} {'type': 'loss', 'content': 0.12408830970525742, 'timestamp': '2025-09-30 22:14:29.050045', 'step': 1889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.080254', 'step': 1889, 'epoch': 1} {'type': 'loss', 'content': 0.2367427945137024, 'timestamp': '2025-09-30 22:14:29.082719', 'step': 1890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:29.113163', 'step': 1890, 'epoch': 1} {'type': 'loss', 'content': 0.1829410344362259, 'timestamp': '2025-09-30 22:14:29.116676', 'step': 1891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.147997', 'step': 1891, 'epoch': 1} {'type': 'loss', 'content': 0.22822566330432892, 'timestamp': '2025-09-30 22:14:29.171720', 'step': 1892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.204011', 'step': 1892, 'epoch': 1} {'type': 'loss', 'content': 0.1264144778251648, 'timestamp': '2025-09-30 22:14:29.208239', 'step': 1893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.241055', 'step': 1893, 'epoch': 1} {'type': 'loss', 'content': 0.1883498728275299, 'timestamp': '2025-09-30 22:14:29.243636', 'step': 1894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:29.273948', 'step': 1894, 'epoch': 1} {'type': 'loss', 'content': 0.16337086260318756, 'timestamp': '2025-09-30 22:14:29.280052', 'step': 1895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.313327', 'step': 1895, 'epoch': 1} {'type': 'loss', 'content': 0.2224908173084259, 'timestamp': '2025-09-30 22:14:29.337925', 'step': 1896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.368281', 'step': 1896, 'epoch': 1} {'type': 'loss', 'content': 0.15966972708702087, 'timestamp': '2025-09-30 22:14:29.371144', 'step': 1897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.402065', 'step': 1897, 'epoch': 1} {'type': 'loss', 'content': 0.11548406630754471, 'timestamp': '2025-09-30 22:14:29.404335', 'step': 1898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.437324', 'step': 1898, 'epoch': 1} {'type': 'loss', 'content': 0.13468432426452637, 'timestamp': '2025-09-30 22:14:29.440010', 'step': 1899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.473148', 'step': 1899, 'epoch': 1} {'type': 'loss', 'content': 0.18565474450588226, 'timestamp': '2025-09-30 22:14:29.498173', 'step': 1900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.531195', 'step': 1900, 'epoch': 1} {'type': 'loss', 'content': 0.23216275870800018, 'timestamp': '2025-09-30 22:14:29.535343', 'step': 1901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.567821', 'step': 1901, 'epoch': 1} {'type': 'loss', 'content': 0.29318001866340637, 'timestamp': '2025-09-30 22:14:29.570240', 'step': 1902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.601129', 'step': 1902, 'epoch': 1} {'type': 'loss', 'content': 0.2447071671485901, 'timestamp': '2025-09-30 22:14:29.603761', 'step': 1903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:29.633957', 'step': 1903, 'epoch': 1} {'type': 'loss', 'content': 0.2682119905948639, 'timestamp': '2025-09-30 22:14:29.657977', 'step': 1904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.688888', 'step': 1904, 'epoch': 1} {'type': 'loss', 'content': 0.1886042207479477, 'timestamp': '2025-09-30 22:14:29.694631', 'step': 1905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.726610', 'step': 1905, 'epoch': 1} {'type': 'loss', 'content': 0.2213820070028305, 'timestamp': '2025-09-30 22:14:29.729264', 'step': 1906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.761708', 'step': 1906, 'epoch': 1} {'type': 'loss', 'content': 0.19161605834960938, 'timestamp': '2025-09-30 22:14:29.763914', 'step': 1907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.797566', 'step': 1907, 'epoch': 1} {'type': 'loss', 'content': 0.2347199022769928, 'timestamp': '2025-09-30 22:14:29.821442', 'step': 1908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:29.852703', 'step': 1908, 'epoch': 1} {'type': 'loss', 'content': 0.18414615094661713, 'timestamp': '2025-09-30 22:14:29.855456', 'step': 1909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:29.886832', 'step': 1909, 'epoch': 1} {'type': 'loss', 'content': 0.19611598551273346, 'timestamp': '2025-09-30 22:14:29.889273', 'step': 1910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.920140', 'step': 1910, 'epoch': 1} {'type': 'loss', 'content': 0.16961459815502167, 'timestamp': '2025-09-30 22:14:29.922240', 'step': 1911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:29.952802', 'step': 1911, 'epoch': 1} {'type': 'loss', 'content': 0.23628954589366913, 'timestamp': '2025-09-30 22:14:29.978986', 'step': 1912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.009052', 'step': 1912, 'epoch': 1} {'type': 'loss', 'content': 0.14082005620002747, 'timestamp': '2025-09-30 22:14:30.011910', 'step': 1913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.042334', 'step': 1913, 'epoch': 1} {'type': 'loss', 'content': 0.1834263950586319, 'timestamp': '2025-09-30 22:14:30.045012', 'step': 1914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:30.075744', 'step': 1914, 'epoch': 1} {'type': 'loss', 'content': 0.27346283197402954, 'timestamp': '2025-09-30 22:14:30.078528', 'step': 1915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:30.109728', 'step': 1915, 'epoch': 1} {'type': 'loss', 'content': 0.15692509710788727, 'timestamp': '2025-09-30 22:14:30.134028', 'step': 1916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.168479', 'step': 1916, 'epoch': 1} {'type': 'loss', 'content': 0.15178729593753815, 'timestamp': '2025-09-30 22:14:30.170919', 'step': 1917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:30.201839', 'step': 1917, 'epoch': 1} {'type': 'loss', 'content': 0.17095768451690674, 'timestamp': '2025-09-30 22:14:30.204800', 'step': 1918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.234980', 'step': 1918, 'epoch': 1} {'type': 'loss', 'content': 0.16219763457775116, 'timestamp': '2025-09-30 22:14:30.238248', 'step': 1919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.268975', 'step': 1919, 'epoch': 1} {'type': 'loss', 'content': 0.18878944218158722, 'timestamp': '2025-09-30 22:14:30.292788', 'step': 1920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.322825', 'step': 1920, 'epoch': 1} {'type': 'loss', 'content': 0.22739441692829132, 'timestamp': '2025-09-30 22:14:30.324870', 'step': 1921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.356134', 'step': 1921, 'epoch': 1} {'type': 'loss', 'content': 0.14289718866348267, 'timestamp': '2025-09-30 22:14:30.359291', 'step': 1922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.390442', 'step': 1922, 'epoch': 1} {'type': 'loss', 'content': 0.15494230389595032, 'timestamp': '2025-09-30 22:14:30.394017', 'step': 1923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:30.425079', 'step': 1923, 'epoch': 1} {'type': 'loss', 'content': 0.2103584110736847, 'timestamp': '2025-09-30 22:14:30.449384', 'step': 1924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.482739', 'step': 1924, 'epoch': 1} {'type': 'loss', 'content': 0.22317011654376984, 'timestamp': '2025-09-30 22:14:30.485593', 'step': 1925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:30.516837', 'step': 1925, 'epoch': 1} {'type': 'loss', 'content': 0.23085226118564606, 'timestamp': '2025-09-30 22:14:30.519132', 'step': 1926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:30.550696', 'step': 1926, 'epoch': 1} {'type': 'loss', 'content': 0.11714915186166763, 'timestamp': '2025-09-30 22:14:30.554046', 'step': 1927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.590865', 'step': 1927, 'epoch': 1} {'type': 'loss', 'content': 0.24138692021369934, 'timestamp': '2025-09-30 22:14:30.616354', 'step': 1928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.648305', 'step': 1928, 'epoch': 1} {'type': 'loss', 'content': 0.19596058130264282, 'timestamp': '2025-09-30 22:14:30.652007', 'step': 1929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.682833', 'step': 1929, 'epoch': 1} {'type': 'loss', 'content': 0.1877022683620453, 'timestamp': '2025-09-30 22:14:30.685898', 'step': 1930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:30.717440', 'step': 1930, 'epoch': 1} {'type': 'loss', 'content': 0.19139674305915833, 'timestamp': '2025-09-30 22:14:30.719569', 'step': 1931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.751497', 'step': 1931, 'epoch': 1} {'type': 'loss', 'content': 0.12918603420257568, 'timestamp': '2025-09-30 22:14:30.776007', 'step': 1932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:30.807078', 'step': 1932, 'epoch': 1} {'type': 'loss', 'content': 0.22612126171588898, 'timestamp': '2025-09-30 22:14:30.809611', 'step': 1933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:30.839981', 'step': 1933, 'epoch': 1} {'type': 'loss', 'content': 0.17531120777130127, 'timestamp': '2025-09-30 22:14:30.842231', 'step': 1934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:30.879578', 'step': 1934, 'epoch': 1} {'type': 'loss', 'content': 0.12119919806718826, 'timestamp': '2025-09-30 22:14:30.882341', 'step': 1935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:30.915150', 'step': 1935, 'epoch': 1} {'type': 'loss', 'content': 0.21608571708202362, 'timestamp': '2025-09-30 22:14:30.939822', 'step': 1936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:30.973141', 'step': 1936, 'epoch': 1} {'type': 'loss', 'content': 0.15113312005996704, 'timestamp': '2025-09-30 22:14:30.978895', 'step': 1937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.012246', 'step': 1937, 'epoch': 1} {'type': 'loss', 'content': 0.24904929101467133, 'timestamp': '2025-09-30 22:14:31.018056', 'step': 1938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.051808', 'step': 1938, 'epoch': 1} {'type': 'loss', 'content': 0.20303863286972046, 'timestamp': '2025-09-30 22:14:31.057357', 'step': 1939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.089543', 'step': 1939, 'epoch': 1} {'type': 'loss', 'content': 0.1681198626756668, 'timestamp': '2025-09-30 22:14:31.113345', 'step': 1940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.149694', 'step': 1940, 'epoch': 1} {'type': 'loss', 'content': 0.27005064487457275, 'timestamp': '2025-09-30 22:14:31.156840', 'step': 1941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.193325', 'step': 1941, 'epoch': 1} {'type': 'loss', 'content': 0.1894511878490448, 'timestamp': '2025-09-30 22:14:31.195832', 'step': 1942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.227421', 'step': 1942, 'epoch': 1} {'type': 'loss', 'content': 0.1441550850868225, 'timestamp': '2025-09-30 22:14:31.230636', 'step': 1943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.260875', 'step': 1943, 'epoch': 1} {'type': 'loss', 'content': 0.22415314614772797, 'timestamp': '2025-09-30 22:14:31.289256', 'step': 1944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.319768', 'step': 1944, 'epoch': 1} {'type': 'loss', 'content': 0.2691539227962494, 'timestamp': '2025-09-30 22:14:31.322837', 'step': 1945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.353364', 'step': 1945, 'epoch': 1} {'type': 'loss', 'content': 0.1388077586889267, 'timestamp': '2025-09-30 22:14:31.359290', 'step': 1946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.393367', 'step': 1946, 'epoch': 1} {'type': 'loss', 'content': 0.18916594982147217, 'timestamp': '2025-09-30 22:14:31.397613', 'step': 1947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.440643', 'step': 1947, 'epoch': 1} {'type': 'loss', 'content': 0.1713588684797287, 'timestamp': '2025-09-30 22:14:31.465357', 'step': 1948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.496456', 'step': 1948, 'epoch': 1} {'type': 'loss', 'content': 0.32265645265579224, 'timestamp': '2025-09-30 22:14:31.509334', 'step': 1949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.543330', 'step': 1949, 'epoch': 1} {'type': 'loss', 'content': 0.18985211849212646, 'timestamp': '2025-09-30 22:14:31.545981', 'step': 1950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.576509', 'step': 1950, 'epoch': 1} {'type': 'loss', 'content': 0.27624422311782837, 'timestamp': '2025-09-30 22:14:31.581909', 'step': 1951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.615721', 'step': 1951, 'epoch': 1} {'type': 'loss', 'content': 0.13463325798511505, 'timestamp': '2025-09-30 22:14:31.642052', 'step': 1952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:31.674108', 'step': 1952, 'epoch': 1} {'type': 'loss', 'content': 0.174066424369812, 'timestamp': '2025-09-30 22:14:31.680493', 'step': 1953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.717277', 'step': 1953, 'epoch': 1} {'type': 'loss', 'content': 0.11579969525337219, 'timestamp': '2025-09-30 22:14:31.719677', 'step': 1954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:31.753999', 'step': 1954, 'epoch': 1} {'type': 'loss', 'content': 0.140971839427948, 'timestamp': '2025-09-30 22:14:31.760055', 'step': 1955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.792697', 'step': 1955, 'epoch': 1} {'type': 'loss', 'content': 0.15516406297683716, 'timestamp': '2025-09-30 22:14:31.820276', 'step': 1956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.855588', 'step': 1956, 'epoch': 1} {'type': 'loss', 'content': 0.22176766395568848, 'timestamp': '2025-09-30 22:14:31.861044', 'step': 1957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:31.894899', 'step': 1957, 'epoch': 1} {'type': 'loss', 'content': 0.19409342110157013, 'timestamp': '2025-09-30 22:14:31.898708', 'step': 1958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.935723', 'step': 1958, 'epoch': 1} {'type': 'loss', 'content': 0.17463736236095428, 'timestamp': '2025-09-30 22:14:31.942042', 'step': 1959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:31.972946', 'step': 1959, 'epoch': 1} {'type': 'loss', 'content': 0.18481074273586273, 'timestamp': '2025-09-30 22:14:31.998095', 'step': 1960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.029486', 'step': 1960, 'epoch': 1} {'type': 'loss', 'content': 0.14646394550800323, 'timestamp': '2025-09-30 22:14:32.032007', 'step': 1961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:32.064992', 'step': 1961, 'epoch': 1} {'type': 'loss', 'content': 0.14284861087799072, 'timestamp': '2025-09-30 22:14:32.067334', 'step': 1962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.098452', 'step': 1962, 'epoch': 1} {'type': 'loss', 'content': 0.1204027459025383, 'timestamp': '2025-09-30 22:14:32.101749', 'step': 1963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.137105', 'step': 1963, 'epoch': 1} {'type': 'loss', 'content': 0.12350573390722275, 'timestamp': '2025-09-30 22:14:32.166029', 'step': 1964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.197560', 'step': 1964, 'epoch': 1} {'type': 'loss', 'content': 0.1614663153886795, 'timestamp': '2025-09-30 22:14:32.203365', 'step': 1965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.234763', 'step': 1965, 'epoch': 1} {'type': 'loss', 'content': 0.1251215934753418, 'timestamp': '2025-09-30 22:14:32.237122', 'step': 1966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.272915', 'step': 1966, 'epoch': 1} {'type': 'loss', 'content': 0.35771793127059937, 'timestamp': '2025-09-30 22:14:32.278958', 'step': 1967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.321633', 'step': 1967, 'epoch': 1} {'type': 'loss', 'content': 0.14337892830371857, 'timestamp': '2025-09-30 22:14:32.347360', 'step': 1968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:32.412458', 'step': 1968, 'epoch': 1} {'type': 'loss', 'content': 0.16249176859855652, 'timestamp': '2025-09-30 22:14:32.418133', 'step': 1969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.453872', 'step': 1969, 'epoch': 1} {'type': 'loss', 'content': 0.14609390497207642, 'timestamp': '2025-09-30 22:14:32.456871', 'step': 1970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.489088', 'step': 1970, 'epoch': 1} {'type': 'loss', 'content': 0.2556743323802948, 'timestamp': '2025-09-30 22:14:32.499336', 'step': 1971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.535070', 'step': 1971, 'epoch': 1} {'type': 'loss', 'content': 0.12186496704816818, 'timestamp': '2025-09-30 22:14:32.559269', 'step': 1972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.594679', 'step': 1972, 'epoch': 1} {'type': 'loss', 'content': 0.1491652876138687, 'timestamp': '2025-09-30 22:14:32.599141', 'step': 1973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.631765', 'step': 1973, 'epoch': 1} {'type': 'loss', 'content': 0.20647984743118286, 'timestamp': '2025-09-30 22:14:32.634106', 'step': 1974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.665687', 'step': 1974, 'epoch': 1} {'type': 'loss', 'content': 0.1616879254579544, 'timestamp': '2025-09-30 22:14:32.667762', 'step': 1975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:32.700718', 'step': 1975, 'epoch': 1} {'type': 'loss', 'content': 0.15572789311408997, 'timestamp': '2025-09-30 22:14:32.726933', 'step': 1976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:32.762182', 'step': 1976, 'epoch': 1} {'type': 'loss', 'content': 0.17972451448440552, 'timestamp': '2025-09-30 22:14:32.768200', 'step': 1977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.803102', 'step': 1977, 'epoch': 1} {'type': 'loss', 'content': 0.18975625932216644, 'timestamp': '2025-09-30 22:14:32.811053', 'step': 1978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:32.852900', 'step': 1978, 'epoch': 1} {'type': 'loss', 'content': 0.268254816532135, 'timestamp': '2025-09-30 22:14:32.863801', 'step': 1979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.899115', 'step': 1979, 'epoch': 1} {'type': 'loss', 'content': 0.1465868353843689, 'timestamp': '2025-09-30 22:14:32.933324', 'step': 1980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:32.967473', 'step': 1980, 'epoch': 1} {'type': 'loss', 'content': 0.18947511911392212, 'timestamp': '2025-09-30 22:14:32.971687', 'step': 1981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:33.011164', 'step': 1981, 'epoch': 1} {'type': 'loss', 'content': 0.2594533860683441, 'timestamp': '2025-09-30 22:14:33.020609', 'step': 1982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:33.053056', 'step': 1982, 'epoch': 1} {'type': 'loss', 'content': 0.20795860886573792, 'timestamp': '2025-09-30 22:14:33.057115', 'step': 1983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.096406', 'step': 1983, 'epoch': 1} {'type': 'loss', 'content': 0.24544410407543182, 'timestamp': '2025-09-30 22:14:33.121549', 'step': 1984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.152524', 'step': 1984, 'epoch': 1} {'type': 'loss', 'content': 0.14127762615680695, 'timestamp': '2025-09-30 22:14:33.157746', 'step': 1985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.193605', 'step': 1985, 'epoch': 1} {'type': 'loss', 'content': 0.13965870440006256, 'timestamp': '2025-09-30 22:14:33.196889', 'step': 1986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.228613', 'step': 1986, 'epoch': 1} {'type': 'loss', 'content': 0.2460656613111496, 'timestamp': '2025-09-30 22:14:33.232229', 'step': 1987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.271588', 'step': 1987, 'epoch': 1} {'type': 'loss', 'content': 0.2088690996170044, 'timestamp': '2025-09-30 22:14:33.296735', 'step': 1988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:33.329619', 'step': 1988, 'epoch': 1} {'type': 'loss', 'content': 0.26663652062416077, 'timestamp': '2025-09-30 22:14:33.331917', 'step': 1989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.365470', 'step': 1989, 'epoch': 1} {'type': 'loss', 'content': 0.14392423629760742, 'timestamp': '2025-09-30 22:14:33.373021', 'step': 1990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:33.409070', 'step': 1990, 'epoch': 1} {'type': 'loss', 'content': 0.16553430259227753, 'timestamp': '2025-09-30 22:14:33.412454', 'step': 1991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.453179', 'step': 1991, 'epoch': 1} {'type': 'loss', 'content': 0.1622607260942459, 'timestamp': '2025-09-30 22:14:33.485438', 'step': 1992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.526929', 'step': 1992, 'epoch': 1} {'type': 'loss', 'content': 0.17446859180927277, 'timestamp': '2025-09-30 22:14:33.531341', 'step': 1993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.567520', 'step': 1993, 'epoch': 1} {'type': 'loss', 'content': 0.19372424483299255, 'timestamp': '2025-09-30 22:14:33.570816', 'step': 1994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.611269', 'step': 1994, 'epoch': 1} {'type': 'loss', 'content': 0.22241000831127167, 'timestamp': '2025-09-30 22:14:33.613452', 'step': 1995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:33.655028', 'step': 1995, 'epoch': 1} {'type': 'loss', 'content': 0.1693042665719986, 'timestamp': '2025-09-30 22:14:33.680271', 'step': 1996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:33.723163', 'step': 1996, 'epoch': 1} {'type': 'loss', 'content': 0.21129487454891205, 'timestamp': '2025-09-30 22:14:33.725496', 'step': 1997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.756707', 'step': 1997, 'epoch': 1} {'type': 'loss', 'content': 0.25757741928100586, 'timestamp': '2025-09-30 22:14:33.759390', 'step': 1998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:33.799269', 'step': 1998, 'epoch': 1} {'type': 'loss', 'content': 0.15207895636558533, 'timestamp': '2025-09-30 22:14:33.810721', 'step': 1999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:33.860746', 'step': 1999, 'epoch': 1} {'type': 'loss', 'content': 0.16389577090740204, 'timestamp': '2025-09-30 22:14:33.895756', 'step': 2000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2000', 'timestamp': '2025-09-30 22:14:39.183662', 'step': 2000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.223893', 'step': 2000, 'epoch': 1} {'type': 'loss', 'content': 0.1440349817276001, 'timestamp': '2025-09-30 22:14:39.231062', 'step': 2001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:39.266524', 'step': 2001, 'epoch': 1} {'type': 'loss', 'content': 0.24145562946796417, 'timestamp': '2025-09-30 22:14:39.269343', 'step': 2002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.302799', 'step': 2002, 'epoch': 1} {'type': 'loss', 'content': 0.18705326318740845, 'timestamp': '2025-09-30 22:14:39.305342', 'step': 2003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:39.336149', 'step': 2003, 'epoch': 1} {'type': 'loss', 'content': 0.21027745306491852, 'timestamp': '2025-09-30 22:14:39.360354', 'step': 2004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:39.395559', 'step': 2004, 'epoch': 1} {'type': 'loss', 'content': 0.1324654221534729, 'timestamp': '2025-09-30 22:14:39.397613', 'step': 2005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.428291', 'step': 2005, 'epoch': 1} {'type': 'loss', 'content': 0.14427757263183594, 'timestamp': '2025-09-30 22:14:39.430444', 'step': 2006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:39.462664', 'step': 2006, 'epoch': 1} {'type': 'loss', 'content': 0.23931945860385895, 'timestamp': '2025-09-30 22:14:39.465056', 'step': 2007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:39.497075', 'step': 2007, 'epoch': 1} {'type': 'loss', 'content': 0.20989461243152618, 'timestamp': '2025-09-30 22:14:39.520662', 'step': 2008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.550957', 'step': 2008, 'epoch': 1} {'type': 'loss', 'content': 0.11813152581453323, 'timestamp': '2025-09-30 22:14:39.552964', 'step': 2009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.583844', 'step': 2009, 'epoch': 1} {'type': 'loss', 'content': 0.19666104018688202, 'timestamp': '2025-09-30 22:14:39.586043', 'step': 2010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:39.616986', 'step': 2010, 'epoch': 1} {'type': 'loss', 'content': 0.1174665242433548, 'timestamp': '2025-09-30 22:14:39.620243', 'step': 2011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.652662', 'step': 2011, 'epoch': 1} {'type': 'loss', 'content': 0.1683882176876068, 'timestamp': '2025-09-30 22:14:39.680391', 'step': 2012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:39.712112', 'step': 2012, 'epoch': 1} {'type': 'loss', 'content': 0.31559067964553833, 'timestamp': '2025-09-30 22:14:39.718496', 'step': 2013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:39.749473', 'step': 2013, 'epoch': 1} {'type': 'loss', 'content': 0.22977913916110992, 'timestamp': '2025-09-30 22:14:39.752054', 'step': 2014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:39.782381', 'step': 2014, 'epoch': 1} {'type': 'loss', 'content': 0.13508929312229156, 'timestamp': '2025-09-30 22:14:39.785140', 'step': 2015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:39.816694', 'step': 2015, 'epoch': 1} {'type': 'loss', 'content': 0.09720215946435928, 'timestamp': '2025-09-30 22:14:39.840396', 'step': 2016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:39.870857', 'step': 2016, 'epoch': 1} {'type': 'loss', 'content': 0.11349402368068695, 'timestamp': '2025-09-30 22:14:39.873295', 'step': 2017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:39.903881', 'step': 2017, 'epoch': 1} {'type': 'loss', 'content': 0.18731753528118134, 'timestamp': '2025-09-30 22:14:39.906011', 'step': 2018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:39.936828', 'step': 2018, 'epoch': 1} {'type': 'loss', 'content': 0.15968936681747437, 'timestamp': '2025-09-30 22:14:39.940788', 'step': 2019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:39.972790', 'step': 2019, 'epoch': 1} {'type': 'loss', 'content': 0.28141435980796814, 'timestamp': '2025-09-30 22:14:39.997455', 'step': 2020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.028191', 'step': 2020, 'epoch': 1} {'type': 'loss', 'content': 0.19348515570163727, 'timestamp': '2025-09-30 22:14:40.030231', 'step': 2021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.063673', 'step': 2021, 'epoch': 1} {'type': 'loss', 'content': 0.17278750240802765, 'timestamp': '2025-09-30 22:14:40.066083', 'step': 2022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.096242', 'step': 2022, 'epoch': 1} {'type': 'loss', 'content': 0.23573850095272064, 'timestamp': '2025-09-30 22:14:40.099300', 'step': 2023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:40.131793', 'step': 2023, 'epoch': 1} {'type': 'loss', 'content': 0.1507398784160614, 'timestamp': '2025-09-30 22:14:40.157287', 'step': 2024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:40.187944', 'step': 2024, 'epoch': 1} {'type': 'loss', 'content': 0.23831836879253387, 'timestamp': '2025-09-30 22:14:40.190571', 'step': 2025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:40.221651', 'step': 2025, 'epoch': 1} {'type': 'loss', 'content': 0.2288508415222168, 'timestamp': '2025-09-30 22:14:40.223996', 'step': 2026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.254662', 'step': 2026, 'epoch': 1} {'type': 'loss', 'content': 0.16497430205345154, 'timestamp': '2025-09-30 22:14:40.257330', 'step': 2027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.287930', 'step': 2027, 'epoch': 1} {'type': 'loss', 'content': 0.12600620090961456, 'timestamp': '2025-09-30 22:14:40.311604', 'step': 2028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.342404', 'step': 2028, 'epoch': 1} {'type': 'loss', 'content': 0.15632264316082, 'timestamp': '2025-09-30 22:14:40.345090', 'step': 2029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.375032', 'step': 2029, 'epoch': 1} {'type': 'loss', 'content': 0.17291320860385895, 'timestamp': '2025-09-30 22:14:40.377779', 'step': 2030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.408021', 'step': 2030, 'epoch': 1} {'type': 'loss', 'content': 0.1191936582326889, 'timestamp': '2025-09-30 22:14:40.410255', 'step': 2031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.441242', 'step': 2031, 'epoch': 1} {'type': 'loss', 'content': 0.14376191794872284, 'timestamp': '2025-09-30 22:14:40.466080', 'step': 2032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.497509', 'step': 2032, 'epoch': 1} {'type': 'loss', 'content': 0.2950476109981537, 'timestamp': '2025-09-30 22:14:40.500043', 'step': 2033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.530598', 'step': 2033, 'epoch': 1} {'type': 'loss', 'content': 0.14951419830322266, 'timestamp': '2025-09-30 22:14:40.533675', 'step': 2034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:40.564764', 'step': 2034, 'epoch': 1} {'type': 'loss', 'content': 0.10191473364830017, 'timestamp': '2025-09-30 22:14:40.567350', 'step': 2035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:40.597675', 'step': 2035, 'epoch': 1} {'type': 'loss', 'content': 0.2609076499938965, 'timestamp': '2025-09-30 22:14:40.629266', 'step': 2036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.659419', 'step': 2036, 'epoch': 1} {'type': 'loss', 'content': 0.15662330389022827, 'timestamp': '2025-09-30 22:14:40.661664', 'step': 2037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.692561', 'step': 2037, 'epoch': 1} {'type': 'loss', 'content': 0.2787724435329437, 'timestamp': '2025-09-30 22:14:40.695003', 'step': 2038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.725714', 'step': 2038, 'epoch': 1} {'type': 'loss', 'content': 0.2241068333387375, 'timestamp': '2025-09-30 22:14:40.728226', 'step': 2039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:40.758563', 'step': 2039, 'epoch': 1} {'type': 'loss', 'content': 0.14349837601184845, 'timestamp': '2025-09-30 22:14:40.782546', 'step': 2040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.812560', 'step': 2040, 'epoch': 1} {'type': 'loss', 'content': 0.18017807602882385, 'timestamp': '2025-09-30 22:14:40.814824', 'step': 2041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.845753', 'step': 2041, 'epoch': 1} {'type': 'loss', 'content': 0.1311459243297577, 'timestamp': '2025-09-30 22:14:40.847663', 'step': 2042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:40.878115', 'step': 2042, 'epoch': 1} {'type': 'loss', 'content': 0.2132789045572281, 'timestamp': '2025-09-30 22:14:40.885587', 'step': 2043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:40.920929', 'step': 2043, 'epoch': 1} {'type': 'loss', 'content': 0.09695465117692947, 'timestamp': '2025-09-30 22:14:40.944380', 'step': 2044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:40.974714', 'step': 2044, 'epoch': 1} {'type': 'loss', 'content': 0.20476095378398895, 'timestamp': '2025-09-30 22:14:40.976913', 'step': 2045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.007292', 'step': 2045, 'epoch': 1} {'type': 'loss', 'content': 0.08294115960597992, 'timestamp': '2025-09-30 22:14:41.009555', 'step': 2046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.039688', 'step': 2046, 'epoch': 1} {'type': 'loss', 'content': 0.18064679205417633, 'timestamp': '2025-09-30 22:14:41.043243', 'step': 2047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.076082', 'step': 2047, 'epoch': 1} {'type': 'loss', 'content': 0.21685102581977844, 'timestamp': '2025-09-30 22:14:41.099875', 'step': 2048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.130275', 'step': 2048, 'epoch': 1} {'type': 'loss', 'content': 0.11740852147340775, 'timestamp': '2025-09-30 22:14:41.133229', 'step': 2049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.163275', 'step': 2049, 'epoch': 1} {'type': 'loss', 'content': 0.1808512657880783, 'timestamp': '2025-09-30 22:14:41.165907', 'step': 2050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.196014', 'step': 2050, 'epoch': 1} {'type': 'loss', 'content': 0.20493347942829132, 'timestamp': '2025-09-30 22:14:41.197923', 'step': 2051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:41.228643', 'step': 2051, 'epoch': 1} {'type': 'loss', 'content': 0.16255402565002441, 'timestamp': '2025-09-30 22:14:41.252113', 'step': 2052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.281811', 'step': 2052, 'epoch': 1} {'type': 'loss', 'content': 0.2399628460407257, 'timestamp': '2025-09-30 22:14:41.283808', 'step': 2053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:41.314302', 'step': 2053, 'epoch': 1} {'type': 'loss', 'content': 0.1444208174943924, 'timestamp': '2025-09-30 22:14:41.316629', 'step': 2054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.346822', 'step': 2054, 'epoch': 1} {'type': 'loss', 'content': 0.11956657469272614, 'timestamp': '2025-09-30 22:14:41.349079', 'step': 2055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.379107', 'step': 2055, 'epoch': 1} {'type': 'loss', 'content': 0.12820132076740265, 'timestamp': '2025-09-30 22:14:41.404132', 'step': 2056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.434273', 'step': 2056, 'epoch': 1} {'type': 'loss', 'content': 0.1287796050310135, 'timestamp': '2025-09-30 22:14:41.436450', 'step': 2057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:41.467097', 'step': 2057, 'epoch': 1} {'type': 'loss', 'content': 0.2251446545124054, 'timestamp': '2025-09-30 22:14:41.469907', 'step': 2058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.501423', 'step': 2058, 'epoch': 1} {'type': 'loss', 'content': 0.13099317252635956, 'timestamp': '2025-09-30 22:14:41.504067', 'step': 2059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.534322', 'step': 2059, 'epoch': 1} {'type': 'loss', 'content': 0.12156327813863754, 'timestamp': '2025-09-30 22:14:41.557945', 'step': 2060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.589131', 'step': 2060, 'epoch': 1} {'type': 'loss', 'content': 0.14568492770195007, 'timestamp': '2025-09-30 22:14:41.591614', 'step': 2061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.635187', 'step': 2061, 'epoch': 1} {'type': 'loss', 'content': 0.13922160863876343, 'timestamp': '2025-09-30 22:14:41.637956', 'step': 2062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.668000', 'step': 2062, 'epoch': 1} {'type': 'loss', 'content': 0.18966034054756165, 'timestamp': '2025-09-30 22:14:41.670346', 'step': 2063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.700553', 'step': 2063, 'epoch': 1} {'type': 'loss', 'content': 0.2540440857410431, 'timestamp': '2025-09-30 22:14:41.724375', 'step': 2064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:41.754625', 'step': 2064, 'epoch': 1} {'type': 'loss', 'content': 0.1672619730234146, 'timestamp': '2025-09-30 22:14:41.758225', 'step': 2065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:41.792164', 'step': 2065, 'epoch': 1} {'type': 'loss', 'content': 0.15601502358913422, 'timestamp': '2025-09-30 22:14:41.794685', 'step': 2066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:41.825716', 'step': 2066, 'epoch': 1} {'type': 'loss', 'content': 0.18733777105808258, 'timestamp': '2025-09-30 22:14:41.827872', 'step': 2067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.858410', 'step': 2067, 'epoch': 1} {'type': 'loss', 'content': 0.15400385856628418, 'timestamp': '2025-09-30 22:14:41.882445', 'step': 2068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:41.914213', 'step': 2068, 'epoch': 1} {'type': 'loss', 'content': 0.2090452015399933, 'timestamp': '2025-09-30 22:14:41.916663', 'step': 2069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.947707', 'step': 2069, 'epoch': 1} {'type': 'loss', 'content': 0.16611497104167938, 'timestamp': '2025-09-30 22:14:41.950161', 'step': 2070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:41.980624', 'step': 2070, 'epoch': 1} {'type': 'loss', 'content': 0.23917317390441895, 'timestamp': '2025-09-30 22:14:41.982576', 'step': 2071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.013594', 'step': 2071, 'epoch': 1} {'type': 'loss', 'content': 0.2001512348651886, 'timestamp': '2025-09-30 22:14:42.037294', 'step': 2072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.067484', 'step': 2072, 'epoch': 1} {'type': 'loss', 'content': 0.14859625697135925, 'timestamp': '2025-09-30 22:14:42.069979', 'step': 2073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:42.101643', 'step': 2073, 'epoch': 1} {'type': 'loss', 'content': 0.13697925209999084, 'timestamp': '2025-09-30 22:14:42.103936', 'step': 2074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.135762', 'step': 2074, 'epoch': 1} {'type': 'loss', 'content': 0.12949542701244354, 'timestamp': '2025-09-30 22:14:42.138138', 'step': 2075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:42.168739', 'step': 2075, 'epoch': 1} {'type': 'loss', 'content': 0.12900066375732422, 'timestamp': '2025-09-30 22:14:42.192797', 'step': 2076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.223168', 'step': 2076, 'epoch': 1} {'type': 'loss', 'content': 0.28980398178100586, 'timestamp': '2025-09-30 22:14:42.225377', 'step': 2077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.255957', 'step': 2077, 'epoch': 1} {'type': 'loss', 'content': 0.2088201940059662, 'timestamp': '2025-09-30 22:14:42.257990', 'step': 2078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:42.288342', 'step': 2078, 'epoch': 1} {'type': 'loss', 'content': 0.13472683727741241, 'timestamp': '2025-09-30 22:14:42.291344', 'step': 2079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.321282', 'step': 2079, 'epoch': 1} {'type': 'loss', 'content': 0.18762977421283722, 'timestamp': '2025-09-30 22:14:42.345068', 'step': 2080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.375272', 'step': 2080, 'epoch': 1} {'type': 'loss', 'content': 0.18794143199920654, 'timestamp': '2025-09-30 22:14:42.377246', 'step': 2081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.407772', 'step': 2081, 'epoch': 1} {'type': 'loss', 'content': 0.12273720651865005, 'timestamp': '2025-09-30 22:14:42.410100', 'step': 2082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.440677', 'step': 2082, 'epoch': 1} {'type': 'loss', 'content': 0.14729753136634827, 'timestamp': '2025-09-30 22:14:42.446067', 'step': 2083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.480787', 'step': 2083, 'epoch': 1} {'type': 'loss', 'content': 0.1220858171582222, 'timestamp': '2025-09-30 22:14:42.504269', 'step': 2084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:42.534532', 'step': 2084, 'epoch': 1} {'type': 'loss', 'content': 0.1405622810125351, 'timestamp': '2025-09-30 22:14:42.537613', 'step': 2085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.570810', 'step': 2085, 'epoch': 1} {'type': 'loss', 'content': 0.1288018375635147, 'timestamp': '2025-09-30 22:14:42.573241', 'step': 2086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.610018', 'step': 2086, 'epoch': 1} {'type': 'loss', 'content': 0.21113552153110504, 'timestamp': '2025-09-30 22:14:42.612244', 'step': 2087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.643588', 'step': 2087, 'epoch': 1} {'type': 'loss', 'content': 0.11888879537582397, 'timestamp': '2025-09-30 22:14:42.667687', 'step': 2088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.700303', 'step': 2088, 'epoch': 1} {'type': 'loss', 'content': 0.17014719545841217, 'timestamp': '2025-09-30 22:14:42.704633', 'step': 2089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.736941', 'step': 2089, 'epoch': 1} {'type': 'loss', 'content': 0.12248222529888153, 'timestamp': '2025-09-30 22:14:42.739820', 'step': 2090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.772715', 'step': 2090, 'epoch': 1} {'type': 'loss', 'content': 0.20367196202278137, 'timestamp': '2025-09-30 22:14:42.775744', 'step': 2091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.807446', 'step': 2091, 'epoch': 1} {'type': 'loss', 'content': 0.21015828847885132, 'timestamp': '2025-09-30 22:14:42.832098', 'step': 2092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:42.861773', 'step': 2092, 'epoch': 1} {'type': 'loss', 'content': 0.284165620803833, 'timestamp': '2025-09-30 22:14:42.863929', 'step': 2093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.894480', 'step': 2093, 'epoch': 1} {'type': 'loss', 'content': 0.17411337792873383, 'timestamp': '2025-09-30 22:14:42.899508', 'step': 2094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:42.930005', 'step': 2094, 'epoch': 1} {'type': 'loss', 'content': 0.12954217195510864, 'timestamp': '2025-09-30 22:14:42.934136', 'step': 2095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:42.966942', 'step': 2095, 'epoch': 1} {'type': 'loss', 'content': 0.11687298864126205, 'timestamp': '2025-09-30 22:14:42.992378', 'step': 2096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:43.024822', 'step': 2096, 'epoch': 1} {'type': 'loss', 'content': 0.1872626543045044, 'timestamp': '2025-09-30 22:14:43.028413', 'step': 2097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:43.061572', 'step': 2097, 'epoch': 1} {'type': 'loss', 'content': 0.2527858316898346, 'timestamp': '2025-09-30 22:14:43.074667', 'step': 2098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.108794', 'step': 2098, 'epoch': 1} {'type': 'loss', 'content': 0.17120511829853058, 'timestamp': '2025-09-30 22:14:43.113029', 'step': 2099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.146311', 'step': 2099, 'epoch': 1} {'type': 'loss', 'content': 0.15585023164749146, 'timestamp': '2025-09-30 22:14:43.171458', 'step': 2100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.202142', 'step': 2100, 'epoch': 1} {'type': 'loss', 'content': 0.17685052752494812, 'timestamp': '2025-09-30 22:14:43.207263', 'step': 2101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.243143', 'step': 2101, 'epoch': 1} {'type': 'loss', 'content': 0.21868883073329926, 'timestamp': '2025-09-30 22:14:43.249876', 'step': 2102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:43.287549', 'step': 2102, 'epoch': 1} {'type': 'loss', 'content': 0.1832035630941391, 'timestamp': '2025-09-30 22:14:43.291374', 'step': 2103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:43.322528', 'step': 2103, 'epoch': 1} {'type': 'loss', 'content': 0.15548960864543915, 'timestamp': '2025-09-30 22:14:43.347298', 'step': 2104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.378893', 'step': 2104, 'epoch': 1} {'type': 'loss', 'content': 0.14521847665309906, 'timestamp': '2025-09-30 22:14:43.382707', 'step': 2105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.416809', 'step': 2105, 'epoch': 1} {'type': 'loss', 'content': 0.19470249116420746, 'timestamp': '2025-09-30 22:14:43.420614', 'step': 2106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.451570', 'step': 2106, 'epoch': 1} {'type': 'loss', 'content': 0.17309658229351044, 'timestamp': '2025-09-30 22:14:43.454117', 'step': 2107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:43.487900', 'step': 2107, 'epoch': 1} {'type': 'loss', 'content': 0.20363843441009521, 'timestamp': '2025-09-30 22:14:43.517736', 'step': 2108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:43.549074', 'step': 2108, 'epoch': 1} {'type': 'loss', 'content': 0.19497814774513245, 'timestamp': '2025-09-30 22:14:43.552474', 'step': 2109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.589737', 'step': 2109, 'epoch': 1} {'type': 'loss', 'content': 0.11577540636062622, 'timestamp': '2025-09-30 22:14:43.592059', 'step': 2110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.626758', 'step': 2110, 'epoch': 1} {'type': 'loss', 'content': 0.21286378800868988, 'timestamp': '2025-09-30 22:14:43.629808', 'step': 2111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:43.660705', 'step': 2111, 'epoch': 1} {'type': 'loss', 'content': 0.16764052212238312, 'timestamp': '2025-09-30 22:14:43.688524', 'step': 2112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.724001', 'step': 2112, 'epoch': 1} {'type': 'loss', 'content': 0.1481291800737381, 'timestamp': '2025-09-30 22:14:43.727554', 'step': 2113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.759339', 'step': 2113, 'epoch': 1} {'type': 'loss', 'content': 0.16497159004211426, 'timestamp': '2025-09-30 22:14:43.762247', 'step': 2114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:43.793200', 'step': 2114, 'epoch': 1} {'type': 'loss', 'content': 0.31660789251327515, 'timestamp': '2025-09-30 22:14:43.796941', 'step': 2115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:43.827469', 'step': 2115, 'epoch': 1} {'type': 'loss', 'content': 0.24148774147033691, 'timestamp': '2025-09-30 22:14:43.851780', 'step': 2116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:43.882761', 'step': 2116, 'epoch': 1} {'type': 'loss', 'content': 0.17513984441757202, 'timestamp': '2025-09-30 22:14:43.886876', 'step': 2117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:43.923880', 'step': 2117, 'epoch': 1} {'type': 'loss', 'content': 0.14777478575706482, 'timestamp': '2025-09-30 22:14:43.932548', 'step': 2118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:43.965562', 'step': 2118, 'epoch': 1} {'type': 'loss', 'content': 0.2170991599559784, 'timestamp': '2025-09-30 22:14:43.968470', 'step': 2119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.003098', 'step': 2119, 'epoch': 1} {'type': 'loss', 'content': 0.162981778383255, 'timestamp': '2025-09-30 22:14:44.027716', 'step': 2120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.058041', 'step': 2120, 'epoch': 1} {'type': 'loss', 'content': 0.1378791630268097, 'timestamp': '2025-09-30 22:14:44.061965', 'step': 2121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:44.093018', 'step': 2121, 'epoch': 1} {'type': 'loss', 'content': 0.17612908780574799, 'timestamp': '2025-09-30 22:14:44.097017', 'step': 2122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.128705', 'step': 2122, 'epoch': 1} {'type': 'loss', 'content': 0.174452006816864, 'timestamp': '2025-09-30 22:14:44.131546', 'step': 2123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:44.163612', 'step': 2123, 'epoch': 1} {'type': 'loss', 'content': 0.13713127374649048, 'timestamp': '2025-09-30 22:14:44.188108', 'step': 2124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:44.236645', 'step': 2124, 'epoch': 1} {'type': 'loss', 'content': 0.1761324256658554, 'timestamp': '2025-09-30 22:14:44.247341', 'step': 2125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.278894', 'step': 2125, 'epoch': 1} {'type': 'loss', 'content': 0.16619624197483063, 'timestamp': '2025-09-30 22:14:44.281746', 'step': 2126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.312424', 'step': 2126, 'epoch': 1} {'type': 'loss', 'content': 0.12828552722930908, 'timestamp': '2025-09-30 22:14:44.314818', 'step': 2127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.345858', 'step': 2127, 'epoch': 1} {'type': 'loss', 'content': 0.28920137882232666, 'timestamp': '2025-09-30 22:14:44.369635', 'step': 2128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:44.400523', 'step': 2128, 'epoch': 1} {'type': 'loss', 'content': 0.22581720352172852, 'timestamp': '2025-09-30 22:14:44.409112', 'step': 2129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.440442', 'step': 2129, 'epoch': 1} {'type': 'loss', 'content': 0.13723479211330414, 'timestamp': '2025-09-30 22:14:44.444058', 'step': 2130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.475217', 'step': 2130, 'epoch': 1} {'type': 'loss', 'content': 0.14743797481060028, 'timestamp': '2025-09-30 22:14:44.478340', 'step': 2131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.509427', 'step': 2131, 'epoch': 1} {'type': 'loss', 'content': 0.22970272600650787, 'timestamp': '2025-09-30 22:14:44.534500', 'step': 2132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:44.565412', 'step': 2132, 'epoch': 1} {'type': 'loss', 'content': 0.12468994408845901, 'timestamp': '2025-09-30 22:14:44.568608', 'step': 2133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.600627', 'step': 2133, 'epoch': 1} {'type': 'loss', 'content': 0.1115139052271843, 'timestamp': '2025-09-30 22:14:44.607093', 'step': 2134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:44.639883', 'step': 2134, 'epoch': 1} {'type': 'loss', 'content': 0.34847748279571533, 'timestamp': '2025-09-30 22:14:44.645378', 'step': 2135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.676419', 'step': 2135, 'epoch': 1} {'type': 'loss', 'content': 0.09042714536190033, 'timestamp': '2025-09-30 22:14:44.701208', 'step': 2136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.733837', 'step': 2136, 'epoch': 1} {'type': 'loss', 'content': 0.16914746165275574, 'timestamp': '2025-09-30 22:14:44.741732', 'step': 2137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:44.778861', 'step': 2137, 'epoch': 1} {'type': 'loss', 'content': 0.2051287144422531, 'timestamp': '2025-09-30 22:14:44.781825', 'step': 2138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:44.815616', 'step': 2138, 'epoch': 1} {'type': 'loss', 'content': 0.08667527139186859, 'timestamp': '2025-09-30 22:14:44.819259', 'step': 2139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.850687', 'step': 2139, 'epoch': 1} {'type': 'loss', 'content': 0.16180534660816193, 'timestamp': '2025-09-30 22:14:44.882849', 'step': 2140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:44.913893', 'step': 2140, 'epoch': 1} {'type': 'loss', 'content': 0.19047911465168, 'timestamp': '2025-09-30 22:14:44.923487', 'step': 2141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:44.955050', 'step': 2141, 'epoch': 1} {'type': 'loss', 'content': 0.12150351703166962, 'timestamp': '2025-09-30 22:14:44.958822', 'step': 2142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:44.989345', 'step': 2142, 'epoch': 1} {'type': 'loss', 'content': 0.19936494529247284, 'timestamp': '2025-09-30 22:14:44.992857', 'step': 2143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.028162', 'step': 2143, 'epoch': 1} {'type': 'loss', 'content': 0.16749359667301178, 'timestamp': '2025-09-30 22:14:45.052884', 'step': 2144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.084469', 'step': 2144, 'epoch': 1} {'type': 'loss', 'content': 0.15116915106773376, 'timestamp': '2025-09-30 22:14:45.087115', 'step': 2145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.120550', 'step': 2145, 'epoch': 1} {'type': 'loss', 'content': 0.15376079082489014, 'timestamp': '2025-09-30 22:14:45.123589', 'step': 2146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.155826', 'step': 2146, 'epoch': 1} {'type': 'loss', 'content': 0.1295701563358307, 'timestamp': '2025-09-30 22:14:45.159229', 'step': 2147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.196382', 'step': 2147, 'epoch': 1} {'type': 'loss', 'content': 0.16243457794189453, 'timestamp': '2025-09-30 22:14:45.226546', 'step': 2148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.256916', 'step': 2148, 'epoch': 1} {'type': 'loss', 'content': 0.14099760353565216, 'timestamp': '2025-09-30 22:14:45.259843', 'step': 2149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.292009', 'step': 2149, 'epoch': 1} {'type': 'loss', 'content': 0.12365642189979553, 'timestamp': '2025-09-30 22:14:45.294523', 'step': 2150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.332440', 'step': 2150, 'epoch': 1} {'type': 'loss', 'content': 0.26393070816993713, 'timestamp': '2025-09-30 22:14:45.335255', 'step': 2151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.367181', 'step': 2151, 'epoch': 1} {'type': 'loss', 'content': 0.15428711473941803, 'timestamp': '2025-09-30 22:14:45.391255', 'step': 2152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:45.421674', 'step': 2152, 'epoch': 1} {'type': 'loss', 'content': 0.10010459274053574, 'timestamp': '2025-09-30 22:14:45.425613', 'step': 2153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.456508', 'step': 2153, 'epoch': 1} {'type': 'loss', 'content': 0.14999812841415405, 'timestamp': '2025-09-30 22:14:45.460028', 'step': 2154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.493469', 'step': 2154, 'epoch': 1} {'type': 'loss', 'content': 0.1629745364189148, 'timestamp': '2025-09-30 22:14:45.502240', 'step': 2155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.535768', 'step': 2155, 'epoch': 1} {'type': 'loss', 'content': 0.2260979562997818, 'timestamp': '2025-09-30 22:14:45.560556', 'step': 2156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.591284', 'step': 2156, 'epoch': 1} {'type': 'loss', 'content': 0.20829570293426514, 'timestamp': '2025-09-30 22:14:45.593964', 'step': 2157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:45.625406', 'step': 2157, 'epoch': 1} {'type': 'loss', 'content': 0.12953373789787292, 'timestamp': '2025-09-30 22:14:45.628664', 'step': 2158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.660391', 'step': 2158, 'epoch': 1} {'type': 'loss', 'content': 0.26012951135635376, 'timestamp': '2025-09-30 22:14:45.673762', 'step': 2159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.713510', 'step': 2159, 'epoch': 1} {'type': 'loss', 'content': 0.1584208756685257, 'timestamp': '2025-09-30 22:14:45.738923', 'step': 2160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.770346', 'step': 2160, 'epoch': 1} {'type': 'loss', 'content': 0.16355441510677338, 'timestamp': '2025-09-30 22:14:45.774071', 'step': 2161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:45.805878', 'step': 2161, 'epoch': 1} {'type': 'loss', 'content': 0.2520352303981781, 'timestamp': '2025-09-30 22:14:45.815806', 'step': 2162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.855486', 'step': 2162, 'epoch': 1} {'type': 'loss', 'content': 0.21355055272579193, 'timestamp': '2025-09-30 22:14:45.859630', 'step': 2163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.890157', 'step': 2163, 'epoch': 1} {'type': 'loss', 'content': 0.20484694838523865, 'timestamp': '2025-09-30 22:14:45.916294', 'step': 2164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:45.947472', 'step': 2164, 'epoch': 1} {'type': 'loss', 'content': 0.2055094689130783, 'timestamp': '2025-09-30 22:14:45.950451', 'step': 2165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:45.983658', 'step': 2165, 'epoch': 1} {'type': 'loss', 'content': 0.11760782450437546, 'timestamp': '2025-09-30 22:14:45.987980', 'step': 2166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.018757', 'step': 2166, 'epoch': 1} {'type': 'loss', 'content': 0.1632254421710968, 'timestamp': '2025-09-30 22:14:46.021676', 'step': 2167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.053112', 'step': 2167, 'epoch': 1} {'type': 'loss', 'content': 0.2811751961708069, 'timestamp': '2025-09-30 22:14:46.077433', 'step': 2168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.108462', 'step': 2168, 'epoch': 1} {'type': 'loss', 'content': 0.1992134302854538, 'timestamp': '2025-09-30 22:14:46.111807', 'step': 2169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:46.143298', 'step': 2169, 'epoch': 1} {'type': 'loss', 'content': 0.15658576786518097, 'timestamp': '2025-09-30 22:14:46.149590', 'step': 2170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:46.182648', 'step': 2170, 'epoch': 1} {'type': 'loss', 'content': 0.20914950966835022, 'timestamp': '2025-09-30 22:14:46.185478', 'step': 2171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:46.221556', 'step': 2171, 'epoch': 1} {'type': 'loss', 'content': 0.26552945375442505, 'timestamp': '2025-09-30 22:14:46.245916', 'step': 2172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.279631', 'step': 2172, 'epoch': 1} {'type': 'loss', 'content': 0.2140212059020996, 'timestamp': '2025-09-30 22:14:46.283508', 'step': 2173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:46.314445', 'step': 2173, 'epoch': 1} {'type': 'loss', 'content': 0.20162729918956757, 'timestamp': '2025-09-30 22:14:46.317350', 'step': 2174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.353742', 'step': 2174, 'epoch': 1} {'type': 'loss', 'content': 0.23602008819580078, 'timestamp': '2025-09-30 22:14:46.357101', 'step': 2175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:46.389756', 'step': 2175, 'epoch': 1} {'type': 'loss', 'content': 0.1725224107503891, 'timestamp': '2025-09-30 22:14:46.414970', 'step': 2176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.449163', 'step': 2176, 'epoch': 1} {'type': 'loss', 'content': 0.14141586422920227, 'timestamp': '2025-09-30 22:14:46.453783', 'step': 2177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.486873', 'step': 2177, 'epoch': 1} {'type': 'loss', 'content': 0.11161714047193527, 'timestamp': '2025-09-30 22:14:46.495808', 'step': 2178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.526743', 'step': 2178, 'epoch': 1} {'type': 'loss', 'content': 0.10832986980676651, 'timestamp': '2025-09-30 22:14:46.530030', 'step': 2179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.561864', 'step': 2179, 'epoch': 1} {'type': 'loss', 'content': 0.22875821590423584, 'timestamp': '2025-09-30 22:14:46.586851', 'step': 2180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.621996', 'step': 2180, 'epoch': 1} {'type': 'loss', 'content': 0.15693430602550507, 'timestamp': '2025-09-30 22:14:46.630830', 'step': 2181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.666504', 'step': 2181, 'epoch': 1} {'type': 'loss', 'content': 0.3023509979248047, 'timestamp': '2025-09-30 22:14:46.669711', 'step': 2182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.706696', 'step': 2182, 'epoch': 1} {'type': 'loss', 'content': 0.12717370688915253, 'timestamp': '2025-09-30 22:14:46.708986', 'step': 2183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.741745', 'step': 2183, 'epoch': 1} {'type': 'loss', 'content': 0.24131594598293304, 'timestamp': '2025-09-30 22:14:46.770546', 'step': 2184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.800844', 'step': 2184, 'epoch': 1} {'type': 'loss', 'content': 0.2812662422657013, 'timestamp': '2025-09-30 22:14:46.803496', 'step': 2185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:46.835168', 'step': 2185, 'epoch': 1} {'type': 'loss', 'content': 0.20574502646923065, 'timestamp': '2025-09-30 22:14:46.838998', 'step': 2186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.869705', 'step': 2186, 'epoch': 1} {'type': 'loss', 'content': 0.17200738191604614, 'timestamp': '2025-09-30 22:14:46.872300', 'step': 2187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:46.904394', 'step': 2187, 'epoch': 1} {'type': 'loss', 'content': 0.2349710613489151, 'timestamp': '2025-09-30 22:14:46.929794', 'step': 2188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:46.959790', 'step': 2188, 'epoch': 1} {'type': 'loss', 'content': 0.17392392456531525, 'timestamp': '2025-09-30 22:14:46.962642', 'step': 2189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:46.994161', 'step': 2189, 'epoch': 1} {'type': 'loss', 'content': 0.15525272488594055, 'timestamp': '2025-09-30 22:14:47.002470', 'step': 2190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:47.036633', 'step': 2190, 'epoch': 1} {'type': 'loss', 'content': 0.15616895258426666, 'timestamp': '2025-09-30 22:14:47.039793', 'step': 2191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.073991', 'step': 2191, 'epoch': 1} {'type': 'loss', 'content': 0.1635034829378128, 'timestamp': '2025-09-30 22:14:47.110334', 'step': 2192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.150594', 'step': 2192, 'epoch': 1} {'type': 'loss', 'content': 0.16706135869026184, 'timestamp': '2025-09-30 22:14:47.153539', 'step': 2193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.185172', 'step': 2193, 'epoch': 1} {'type': 'loss', 'content': 0.17650003731250763, 'timestamp': '2025-09-30 22:14:47.193882', 'step': 2194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.226547', 'step': 2194, 'epoch': 1} {'type': 'loss', 'content': 0.19827218353748322, 'timestamp': '2025-09-30 22:14:47.229074', 'step': 2195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:47.266747', 'step': 2195, 'epoch': 1} {'type': 'loss', 'content': 0.19310013949871063, 'timestamp': '2025-09-30 22:14:47.296327', 'step': 2196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.326698', 'step': 2196, 'epoch': 1} {'type': 'loss', 'content': 0.13106968998908997, 'timestamp': '2025-09-30 22:14:47.338319', 'step': 2197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:47.374495', 'step': 2197, 'epoch': 1} {'type': 'loss', 'content': 0.08239457756280899, 'timestamp': '2025-09-30 22:14:47.378038', 'step': 2198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.413313', 'step': 2198, 'epoch': 1} {'type': 'loss', 'content': 0.18669718503952026, 'timestamp': '2025-09-30 22:14:47.415489', 'step': 2199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:47.446271', 'step': 2199, 'epoch': 1} {'type': 'loss', 'content': 0.24198266863822937, 'timestamp': '2025-09-30 22:14:47.470913', 'step': 2200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.507279', 'step': 2200, 'epoch': 1} {'type': 'loss', 'content': 0.10703623294830322, 'timestamp': '2025-09-30 22:14:47.515412', 'step': 2201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:14:47.554437', 'step': 2201, 'epoch': 1} {'type': 'loss', 'content': 0.1927982121706009, 'timestamp': '2025-09-30 22:14:47.559084', 'step': 2202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.590722', 'step': 2202, 'epoch': 1} {'type': 'loss', 'content': 0.16517359018325806, 'timestamp': '2025-09-30 22:14:47.593922', 'step': 2203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.636164', 'step': 2203, 'epoch': 1} {'type': 'loss', 'content': 0.09430710971355438, 'timestamp': '2025-09-30 22:14:47.661278', 'step': 2204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.692277', 'step': 2204, 'epoch': 1} {'type': 'loss', 'content': 0.11545757204294205, 'timestamp': '2025-09-30 22:14:47.695749', 'step': 2205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:47.726660', 'step': 2205, 'epoch': 1} {'type': 'loss', 'content': 0.27725949883461, 'timestamp': '2025-09-30 22:14:47.736509', 'step': 2206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:47.772171', 'step': 2206, 'epoch': 1} {'type': 'loss', 'content': 0.15826953947544098, 'timestamp': '2025-09-30 22:14:47.780429', 'step': 2207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:47.812567', 'step': 2207, 'epoch': 1} {'type': 'loss', 'content': 0.2518770396709442, 'timestamp': '2025-09-30 22:14:47.836463', 'step': 2208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:47.872167', 'step': 2208, 'epoch': 1} {'type': 'loss', 'content': 0.12643511593341827, 'timestamp': '2025-09-30 22:14:47.876504', 'step': 2209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.907471', 'step': 2209, 'epoch': 1} {'type': 'loss', 'content': 0.18763436377048492, 'timestamp': '2025-09-30 22:14:47.915855', 'step': 2210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.952007', 'step': 2210, 'epoch': 1} {'type': 'loss', 'content': 0.31470996141433716, 'timestamp': '2025-09-30 22:14:47.955134', 'step': 2211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:47.986376', 'step': 2211, 'epoch': 1} {'type': 'loss', 'content': 0.3505728542804718, 'timestamp': '2025-09-30 22:14:48.017067', 'step': 2212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.061623', 'step': 2212, 'epoch': 1} {'type': 'loss', 'content': 0.0821753665804863, 'timestamp': '2025-09-30 22:14:48.064689', 'step': 2213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:48.099680', 'step': 2213, 'epoch': 1} {'type': 'loss', 'content': 0.21219372749328613, 'timestamp': '2025-09-30 22:14:48.110895', 'step': 2214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:48.142469', 'step': 2214, 'epoch': 1} {'type': 'loss', 'content': 0.17904432117938995, 'timestamp': '2025-09-30 22:14:48.148755', 'step': 2215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.188660', 'step': 2215, 'epoch': 1} {'type': 'loss', 'content': 0.21836306154727936, 'timestamp': '2025-09-30 22:14:48.214055', 'step': 2216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:48.245782', 'step': 2216, 'epoch': 1} {'type': 'loss', 'content': 0.10244496911764145, 'timestamp': '2025-09-30 22:14:48.253964', 'step': 2217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.287672', 'step': 2217, 'epoch': 1} {'type': 'loss', 'content': 0.24877814948558807, 'timestamp': '2025-09-30 22:14:48.295677', 'step': 2218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:48.334111', 'step': 2218, 'epoch': 1} {'type': 'loss', 'content': 0.1300468146800995, 'timestamp': '2025-09-30 22:14:48.343898', 'step': 2219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:48.386008', 'step': 2219, 'epoch': 1} {'type': 'loss', 'content': 0.16408416628837585, 'timestamp': '2025-09-30 22:14:48.411381', 'step': 2220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:48.452920', 'step': 2220, 'epoch': 1} {'type': 'loss', 'content': 0.17670950293540955, 'timestamp': '2025-09-30 22:14:48.459187', 'step': 2221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.491165', 'step': 2221, 'epoch': 1} {'type': 'loss', 'content': 0.12963257730007172, 'timestamp': '2025-09-30 22:14:48.494542', 'step': 2222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.533278', 'step': 2222, 'epoch': 1} {'type': 'loss', 'content': 0.23963424563407898, 'timestamp': '2025-09-30 22:14:48.540096', 'step': 2223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:48.576976', 'step': 2223, 'epoch': 1} {'type': 'loss', 'content': 0.224950909614563, 'timestamp': '2025-09-30 22:14:48.603854', 'step': 2224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:48.638322', 'step': 2224, 'epoch': 1} {'type': 'loss', 'content': 0.19083791971206665, 'timestamp': '2025-09-30 22:14:48.642376', 'step': 2225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.674611', 'step': 2225, 'epoch': 1} {'type': 'loss', 'content': 0.21568208932876587, 'timestamp': '2025-09-30 22:14:48.678844', 'step': 2226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.710701', 'step': 2226, 'epoch': 1} {'type': 'loss', 'content': 0.2148791253566742, 'timestamp': '2025-09-30 22:14:48.714672', 'step': 2227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:48.745875', 'step': 2227, 'epoch': 1} {'type': 'loss', 'content': 0.13085561990737915, 'timestamp': '2025-09-30 22:14:48.770911', 'step': 2228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:48.801091', 'step': 2228, 'epoch': 1} {'type': 'loss', 'content': 0.17948724329471588, 'timestamp': '2025-09-30 22:14:48.804590', 'step': 2229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:48.835877', 'step': 2229, 'epoch': 1} {'type': 'loss', 'content': 0.20288947224617004, 'timestamp': '2025-09-30 22:14:48.838839', 'step': 2230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.870688', 'step': 2230, 'epoch': 1} {'type': 'loss', 'content': 0.14854587614536285, 'timestamp': '2025-09-30 22:14:48.874147', 'step': 2231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:48.906258', 'step': 2231, 'epoch': 1} {'type': 'loss', 'content': 0.15901345014572144, 'timestamp': '2025-09-30 22:14:48.930106', 'step': 2232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:48.966608', 'step': 2232, 'epoch': 1} {'type': 'loss', 'content': 0.08944214135408401, 'timestamp': '2025-09-30 22:14:48.976488', 'step': 2233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.007822', 'step': 2233, 'epoch': 1} {'type': 'loss', 'content': 0.18860915303230286, 'timestamp': '2025-09-30 22:14:49.011473', 'step': 2234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:49.047272', 'step': 2234, 'epoch': 1} {'type': 'loss', 'content': 0.19745586812496185, 'timestamp': '2025-09-30 22:14:49.051569', 'step': 2235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.089573', 'step': 2235, 'epoch': 1} {'type': 'loss', 'content': 0.16053837537765503, 'timestamp': '2025-09-30 22:14:49.123382', 'step': 2236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:49.155347', 'step': 2236, 'epoch': 1} {'type': 'loss', 'content': 0.3235865533351898, 'timestamp': '2025-09-30 22:14:49.159094', 'step': 2237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:49.191849', 'step': 2237, 'epoch': 1} {'type': 'loss', 'content': 0.1489059031009674, 'timestamp': '2025-09-30 22:14:49.195443', 'step': 2238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:49.226725', 'step': 2238, 'epoch': 1} {'type': 'loss', 'content': 0.15835753083229065, 'timestamp': '2025-09-30 22:14:49.239004', 'step': 2239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.269884', 'step': 2239, 'epoch': 1} {'type': 'loss', 'content': 0.09983498603105545, 'timestamp': '2025-09-30 22:14:49.295134', 'step': 2240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.326329', 'step': 2240, 'epoch': 1} {'type': 'loss', 'content': 0.2353704273700714, 'timestamp': '2025-09-30 22:14:49.329235', 'step': 2241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:49.369929', 'step': 2241, 'epoch': 1} {'type': 'loss', 'content': 0.18431761860847473, 'timestamp': '2025-09-30 22:14:49.373454', 'step': 2242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:49.416557', 'step': 2242, 'epoch': 1} {'type': 'loss', 'content': 0.23223000764846802, 'timestamp': '2025-09-30 22:14:49.422361', 'step': 2243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:49.453459', 'step': 2243, 'epoch': 1} {'type': 'loss', 'content': 0.10892900824546814, 'timestamp': '2025-09-30 22:14:49.489970', 'step': 2244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:49.530930', 'step': 2244, 'epoch': 1} {'type': 'loss', 'content': 0.23110702633857727, 'timestamp': '2025-09-30 22:14:49.545047', 'step': 2245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:49.576800', 'step': 2245, 'epoch': 1} {'type': 'loss', 'content': 0.1557312160730362, 'timestamp': '2025-09-30 22:14:49.581130', 'step': 2246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:49.613322', 'step': 2246, 'epoch': 1} {'type': 'loss', 'content': 0.23306754231452942, 'timestamp': '2025-09-30 22:14:49.617324', 'step': 2247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:49.652325', 'step': 2247, 'epoch': 1} {'type': 'loss', 'content': 0.1640544831752777, 'timestamp': '2025-09-30 22:14:49.685058', 'step': 2248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:49.718255', 'step': 2248, 'epoch': 1} {'type': 'loss', 'content': 0.15288151800632477, 'timestamp': '2025-09-30 22:14:49.725870', 'step': 2249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:49.765610', 'step': 2249, 'epoch': 1} {'type': 'loss', 'content': 0.17876680195331573, 'timestamp': '2025-09-30 22:14:49.773287', 'step': 2250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:49.809504', 'step': 2250, 'epoch': 1} {'type': 'loss', 'content': 0.13243962824344635, 'timestamp': '2025-09-30 22:14:49.817918', 'step': 2251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.855336', 'step': 2251, 'epoch': 1} {'type': 'loss', 'content': 0.22362951934337616, 'timestamp': '2025-09-30 22:14:49.880608', 'step': 2252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:49.912174', 'step': 2252, 'epoch': 1} {'type': 'loss', 'content': 0.16227756440639496, 'timestamp': '2025-09-30 22:14:49.915413', 'step': 2253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:49.956170', 'step': 2253, 'epoch': 1} {'type': 'loss', 'content': 0.1550697535276413, 'timestamp': '2025-09-30 22:14:49.959333', 'step': 2254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:14:49.990820', 'step': 2254, 'epoch': 1} {'type': 'loss', 'content': 0.18781155347824097, 'timestamp': '2025-09-30 22:14:49.996274', 'step': 2255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.027926', 'step': 2255, 'epoch': 1} {'type': 'loss', 'content': 0.14376986026763916, 'timestamp': '2025-09-30 22:14:50.052535', 'step': 2256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.083892', 'step': 2256, 'epoch': 1} {'type': 'loss', 'content': 0.20803087949752808, 'timestamp': '2025-09-30 22:14:50.086997', 'step': 2257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.122986', 'step': 2257, 'epoch': 1} {'type': 'loss', 'content': 0.10114289820194244, 'timestamp': '2025-09-30 22:14:50.134096', 'step': 2258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.166573', 'step': 2258, 'epoch': 1} {'type': 'loss', 'content': 0.259818434715271, 'timestamp': '2025-09-30 22:14:50.176565', 'step': 2259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.208350', 'step': 2259, 'epoch': 1} {'type': 'loss', 'content': 0.16186852753162384, 'timestamp': '2025-09-30 22:14:50.234067', 'step': 2260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.265329', 'step': 2260, 'epoch': 1} {'type': 'loss', 'content': 0.16921725869178772, 'timestamp': '2025-09-30 22:14:50.268367', 'step': 2261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.300012', 'step': 2261, 'epoch': 1} {'type': 'loss', 'content': 0.21247224509716034, 'timestamp': '2025-09-30 22:14:50.311651', 'step': 2262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.342742', 'step': 2262, 'epoch': 1} {'type': 'loss', 'content': 0.15760141611099243, 'timestamp': '2025-09-30 22:14:50.347800', 'step': 2263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.382929', 'step': 2263, 'epoch': 1} {'type': 'loss', 'content': 0.13922935724258423, 'timestamp': '2025-09-30 22:14:50.408568', 'step': 2264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.449760', 'step': 2264, 'epoch': 1} {'type': 'loss', 'content': 0.14187239110469818, 'timestamp': '2025-09-30 22:14:50.460847', 'step': 2265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.498796', 'step': 2265, 'epoch': 1} {'type': 'loss', 'content': 0.15542460978031158, 'timestamp': '2025-09-30 22:14:50.510374', 'step': 2266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.547931', 'step': 2266, 'epoch': 1} {'type': 'loss', 'content': 0.201261505484581, 'timestamp': '2025-09-30 22:14:50.554566', 'step': 2267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.593638', 'step': 2267, 'epoch': 1} {'type': 'loss', 'content': 0.12224423885345459, 'timestamp': '2025-09-30 22:14:50.617733', 'step': 2268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.654034', 'step': 2268, 'epoch': 1} {'type': 'loss', 'content': 0.27481910586357117, 'timestamp': '2025-09-30 22:14:50.657310', 'step': 2269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.689784', 'step': 2269, 'epoch': 1} {'type': 'loss', 'content': 0.21620406210422516, 'timestamp': '2025-09-30 22:14:50.693296', 'step': 2270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.724505', 'step': 2270, 'epoch': 1} {'type': 'loss', 'content': 0.1672198325395584, 'timestamp': '2025-09-30 22:14:50.731182', 'step': 2271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.767336', 'step': 2271, 'epoch': 1} {'type': 'loss', 'content': 0.1471605896949768, 'timestamp': '2025-09-30 22:14:50.791911', 'step': 2272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:50.828228', 'step': 2272, 'epoch': 1} {'type': 'loss', 'content': 0.2575627565383911, 'timestamp': '2025-09-30 22:14:50.835455', 'step': 2273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:50.869287', 'step': 2273, 'epoch': 1} {'type': 'loss', 'content': 0.1341443806886673, 'timestamp': '2025-09-30 22:14:50.875002', 'step': 2274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.905466', 'step': 2274, 'epoch': 1} {'type': 'loss', 'content': 0.22684235870838165, 'timestamp': '2025-09-30 22:14:50.912401', 'step': 2275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:50.947792', 'step': 2275, 'epoch': 1} {'type': 'loss', 'content': 0.2063121199607849, 'timestamp': '2025-09-30 22:14:50.977287', 'step': 2276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.009757', 'step': 2276, 'epoch': 1} {'type': 'loss', 'content': 0.1876576989889145, 'timestamp': '2025-09-30 22:14:51.016668', 'step': 2277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.048719', 'step': 2277, 'epoch': 1} {'type': 'loss', 'content': 0.24612879753112793, 'timestamp': '2025-09-30 22:14:51.055495', 'step': 2278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.088067', 'step': 2278, 'epoch': 1} {'type': 'loss', 'content': 0.24195155501365662, 'timestamp': '2025-09-30 22:14:51.095859', 'step': 2279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:51.127252', 'step': 2279, 'epoch': 1} {'type': 'loss', 'content': 0.18929530680179596, 'timestamp': '2025-09-30 22:14:51.157063', 'step': 2280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.189271', 'step': 2280, 'epoch': 1} {'type': 'loss', 'content': 0.2556532323360443, 'timestamp': '2025-09-30 22:14:51.192636', 'step': 2281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.223921', 'step': 2281, 'epoch': 1} {'type': 'loss', 'content': 0.19267290830612183, 'timestamp': '2025-09-30 22:14:51.230686', 'step': 2282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.267293', 'step': 2282, 'epoch': 1} {'type': 'loss', 'content': 0.17956817150115967, 'timestamp': '2025-09-30 22:14:51.274054', 'step': 2283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.304739', 'step': 2283, 'epoch': 1} {'type': 'loss', 'content': 0.20451481640338898, 'timestamp': '2025-09-30 22:14:51.331726', 'step': 2284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.362052', 'step': 2284, 'epoch': 1} {'type': 'loss', 'content': 0.1147875040769577, 'timestamp': '2025-09-30 22:14:51.371436', 'step': 2285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:51.407109', 'step': 2285, 'epoch': 1} {'type': 'loss', 'content': 0.11910317093133926, 'timestamp': '2025-09-30 22:14:51.412674', 'step': 2286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:51.446848', 'step': 2286, 'epoch': 1} {'type': 'loss', 'content': 0.1450430005788803, 'timestamp': '2025-09-30 22:14:51.450867', 'step': 2287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.482735', 'step': 2287, 'epoch': 1} {'type': 'loss', 'content': 0.17137698829174042, 'timestamp': '2025-09-30 22:14:51.507309', 'step': 2288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:14:51.545655', 'step': 2288, 'epoch': 1} {'type': 'loss', 'content': 0.25188198685646057, 'timestamp': '2025-09-30 22:14:51.548857', 'step': 2289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:51.581206', 'step': 2289, 'epoch': 1} {'type': 'loss', 'content': 0.21191053092479706, 'timestamp': '2025-09-30 22:14:51.585860', 'step': 2290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.617165', 'step': 2290, 'epoch': 1} {'type': 'loss', 'content': 0.22869589924812317, 'timestamp': '2025-09-30 22:14:51.621115', 'step': 2291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.662467', 'step': 2291, 'epoch': 1} {'type': 'loss', 'content': 0.17393797636032104, 'timestamp': '2025-09-30 22:14:51.695745', 'step': 2292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:51.730461', 'step': 2292, 'epoch': 1} {'type': 'loss', 'content': 0.1991657316684723, 'timestamp': '2025-09-30 22:14:51.734321', 'step': 2293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.766739', 'step': 2293, 'epoch': 1} {'type': 'loss', 'content': 0.18680444359779358, 'timestamp': '2025-09-30 22:14:51.771363', 'step': 2294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:51.803158', 'step': 2294, 'epoch': 1} {'type': 'loss', 'content': 0.24460233747959137, 'timestamp': '2025-09-30 22:14:51.810651', 'step': 2295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.841793', 'step': 2295, 'epoch': 1} {'type': 'loss', 'content': 0.1405254602432251, 'timestamp': '2025-09-30 22:14:51.870471', 'step': 2296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.903267', 'step': 2296, 'epoch': 1} {'type': 'loss', 'content': 0.19793488085269928, 'timestamp': '2025-09-30 22:14:51.906163', 'step': 2297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:51.950163', 'step': 2297, 'epoch': 1} {'type': 'loss', 'content': 0.108750119805336, 'timestamp': '2025-09-30 22:14:51.954398', 'step': 2298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:51.986502', 'step': 2298, 'epoch': 1} {'type': 'loss', 'content': 0.1757780760526657, 'timestamp': '2025-09-30 22:14:51.993818', 'step': 2299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:52.027438', 'step': 2299, 'epoch': 1} {'type': 'loss', 'content': 0.1816791296005249, 'timestamp': '2025-09-30 22:14:52.055936', 'step': 2300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.091166', 'step': 2300, 'epoch': 1} {'type': 'loss', 'content': 0.1494143307209015, 'timestamp': '2025-09-30 22:14:52.094142', 'step': 2301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.129942', 'step': 2301, 'epoch': 1} {'type': 'loss', 'content': 0.14960749447345734, 'timestamp': '2025-09-30 22:14:52.138236', 'step': 2302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:52.174153', 'step': 2302, 'epoch': 1} {'type': 'loss', 'content': 0.23960670828819275, 'timestamp': '2025-09-30 22:14:52.182722', 'step': 2303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.220211', 'step': 2303, 'epoch': 1} {'type': 'loss', 'content': 0.18768127262592316, 'timestamp': '2025-09-30 22:14:52.247609', 'step': 2304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:52.278511', 'step': 2304, 'epoch': 1} {'type': 'loss', 'content': 0.18024449050426483, 'timestamp': '2025-09-30 22:14:52.282154', 'step': 2305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:52.314942', 'step': 2305, 'epoch': 1} {'type': 'loss', 'content': 0.22020964324474335, 'timestamp': '2025-09-30 22:14:52.321350', 'step': 2306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.357437', 'step': 2306, 'epoch': 1} {'type': 'loss', 'content': 0.12178914994001389, 'timestamp': '2025-09-30 22:14:52.363997', 'step': 2307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.399529', 'step': 2307, 'epoch': 1} {'type': 'loss', 'content': 0.3532545566558838, 'timestamp': '2025-09-30 22:14:52.424378', 'step': 2308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:52.459659', 'step': 2308, 'epoch': 1} {'type': 'loss', 'content': 0.22464220225811005, 'timestamp': '2025-09-30 22:14:52.462212', 'step': 2309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.495480', 'step': 2309, 'epoch': 1} {'type': 'loss', 'content': 0.23071394860744476, 'timestamp': '2025-09-30 22:14:52.498526', 'step': 2310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:52.535164', 'step': 2310, 'epoch': 1} {'type': 'loss', 'content': 0.1064872145652771, 'timestamp': '2025-09-30 22:14:52.546341', 'step': 2311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:52.586002', 'step': 2311, 'epoch': 1} {'type': 'loss', 'content': 0.1740281730890274, 'timestamp': '2025-09-30 22:14:52.616255', 'step': 2312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:14:52.648737', 'step': 2312, 'epoch': 1} {'type': 'loss', 'content': 0.14409154653549194, 'timestamp': '2025-09-30 22:14:52.655892', 'step': 2313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:52.694950', 'step': 2313, 'epoch': 1} {'type': 'loss', 'content': 0.12215302884578705, 'timestamp': '2025-09-30 22:14:52.701091', 'step': 2314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:52.735576', 'step': 2314, 'epoch': 1} {'type': 'loss', 'content': 0.12115397304296494, 'timestamp': '2025-09-30 22:14:52.738289', 'step': 2315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:14:52.769132', 'step': 2315, 'epoch': 1} {'type': 'loss', 'content': 0.10789027065038681, 'timestamp': '2025-09-30 22:14:52.793731', 'step': 2316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:52.826409', 'step': 2316, 'epoch': 1} {'type': 'loss', 'content': 0.12357912957668304, 'timestamp': '2025-09-30 22:14:52.828800', 'step': 2317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:52.859884', 'step': 2317, 'epoch': 1} {'type': 'loss', 'content': 0.11107781529426575, 'timestamp': '2025-09-30 22:14:52.861993', 'step': 2318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.895839', 'step': 2318, 'epoch': 1} {'type': 'loss', 'content': 0.1962171196937561, 'timestamp': '2025-09-30 22:14:52.897931', 'step': 2319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:14:52.928216', 'step': 2319, 'epoch': 1} {'type': 'loss', 'content': 0.14007744193077087, 'timestamp': '2025-09-30 22:14:52.953860', 'step': 2320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:52.984347', 'step': 2320, 'epoch': 1} {'type': 'loss', 'content': 0.24991098046302795, 'timestamp': '2025-09-30 22:14:52.986436', 'step': 2321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:53.016755', 'step': 2321, 'epoch': 1} {'type': 'loss', 'content': 0.1981104612350464, 'timestamp': '2025-09-30 22:14:53.018925', 'step': 2322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:53.049053', 'step': 2322, 'epoch': 1} {'type': 'loss', 'content': 0.16611239314079285, 'timestamp': '2025-09-30 22:14:53.050989', 'step': 2323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:53.080862', 'step': 2323, 'epoch': 1} {'type': 'loss', 'content': 0.21887223422527313, 'timestamp': '2025-09-30 22:14:53.104782', 'step': 2324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:53.135140', 'step': 2324, 'epoch': 1} {'type': 'loss', 'content': 0.23425346612930298, 'timestamp': '2025-09-30 22:14:53.137258', 'step': 2325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:53.167270', 'step': 2325, 'epoch': 1} {'type': 'loss', 'content': 0.12277828902006149, 'timestamp': '2025-09-30 22:14:53.169444', 'step': 2326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:53.199963', 'step': 2326, 'epoch': 1} {'type': 'loss', 'content': 0.18251530826091766, 'timestamp': '2025-09-30 22:14:53.202284', 'step': 2327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:14:53.232115', 'step': 2327, 'epoch': 1} {'type': 'loss', 'content': 0.12189308553934097, 'timestamp': '2025-09-30 22:14:53.255844', 'step': 2328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:14:53.285932', 'step': 2328, 'epoch': 1} {'type': 'loss', 'content': 0.1450241208076477, 'timestamp': '2025-09-30 22:14:53.288128', 'step': 2329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:14:53.317693', 'step': 2329, 'epoch': 1} {'type': 'loss', 'content': 0.14140662550926208, 'timestamp': '2025-09-30 22:14:53.320028', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:15:01.800307', 'step': 2330, 'epoch': 1} {'type': 'pplx', 'content': 8052.03345005221, 'timestamp': '2025-09-30 22:15:01.803753', 'step': 2330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:01.833802', 'step': 2330, 'epoch': 1} {'type': 'loss', 'content': 0.17043255269527435, 'timestamp': '2025-09-30 22:15:01.842707', 'step': 2331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:01.874767', 'step': 2331, 'epoch': 1} {'type': 'loss', 'content': 0.22632601857185364, 'timestamp': '2025-09-30 22:15:01.899755', 'step': 2332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:01.940099', 'step': 2332, 'epoch': 1} {'type': 'loss', 'content': 0.137651264667511, 'timestamp': '2025-09-30 22:15:01.944033', 'step': 2333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:01.976123', 'step': 2333, 'epoch': 1} {'type': 'loss', 'content': 0.1810152232646942, 'timestamp': '2025-09-30 22:15:01.979816', 'step': 2334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.012075', 'step': 2334, 'epoch': 1} {'type': 'loss', 'content': 0.14546209573745728, 'timestamp': '2025-09-30 22:15:02.014987', 'step': 2335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.046247', 'step': 2335, 'epoch': 1} {'type': 'loss', 'content': 0.17430917918682098, 'timestamp': '2025-09-30 22:15:02.070837', 'step': 2336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.102399', 'step': 2336, 'epoch': 1} {'type': 'loss', 'content': 0.17695656418800354, 'timestamp': '2025-09-30 22:15:02.104759', 'step': 2337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:02.143691', 'step': 2337, 'epoch': 1} {'type': 'loss', 'content': 0.10627778619527817, 'timestamp': '2025-09-30 22:15:02.148017', 'step': 2338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.180450', 'step': 2338, 'epoch': 1} {'type': 'loss', 'content': 0.12272730469703674, 'timestamp': '2025-09-30 22:15:02.183236', 'step': 2339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.213576', 'step': 2339, 'epoch': 1} {'type': 'loss', 'content': 0.19652597606182098, 'timestamp': '2025-09-30 22:15:02.238137', 'step': 2340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.269255', 'step': 2340, 'epoch': 1} {'type': 'loss', 'content': 0.19865751266479492, 'timestamp': '2025-09-30 22:15:02.271381', 'step': 2341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.312011', 'step': 2341, 'epoch': 1} {'type': 'loss', 'content': 0.14496412873268127, 'timestamp': '2025-09-30 22:15:02.314766', 'step': 2342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.346490', 'step': 2342, 'epoch': 1} {'type': 'loss', 'content': 0.1746627390384674, 'timestamp': '2025-09-30 22:15:02.350294', 'step': 2343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.380545', 'step': 2343, 'epoch': 1} {'type': 'loss', 'content': 0.1439034342765808, 'timestamp': '2025-09-30 22:15:02.408898', 'step': 2344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.439204', 'step': 2344, 'epoch': 1} {'type': 'loss', 'content': 0.2070564180612564, 'timestamp': '2025-09-30 22:15:02.441771', 'step': 2345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.474784', 'step': 2345, 'epoch': 1} {'type': 'loss', 'content': 0.1557970494031906, 'timestamp': '2025-09-30 22:15:02.478284', 'step': 2346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.508809', 'step': 2346, 'epoch': 1} {'type': 'loss', 'content': 0.14965927600860596, 'timestamp': '2025-09-30 22:15:02.511334', 'step': 2347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.544279', 'step': 2347, 'epoch': 1} {'type': 'loss', 'content': 0.1531575471162796, 'timestamp': '2025-09-30 22:15:02.568534', 'step': 2348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.601883', 'step': 2348, 'epoch': 1} {'type': 'loss', 'content': 0.18457943201065063, 'timestamp': '2025-09-30 22:15:02.605078', 'step': 2349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.636493', 'step': 2349, 'epoch': 1} {'type': 'loss', 'content': 0.15323105454444885, 'timestamp': '2025-09-30 22:15:02.639099', 'step': 2350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.669933', 'step': 2350, 'epoch': 1} {'type': 'loss', 'content': 0.2564782500267029, 'timestamp': '2025-09-30 22:15:02.672052', 'step': 2351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.702483', 'step': 2351, 'epoch': 1} {'type': 'loss', 'content': 0.07210040092468262, 'timestamp': '2025-09-30 22:15:02.727794', 'step': 2352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:02.760559', 'step': 2352, 'epoch': 1} {'type': 'loss', 'content': 0.12593084573745728, 'timestamp': '2025-09-30 22:15:02.763825', 'step': 2353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:02.799131', 'step': 2353, 'epoch': 1} {'type': 'loss', 'content': 0.0834367647767067, 'timestamp': '2025-09-30 22:15:02.802241', 'step': 2354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.838315', 'step': 2354, 'epoch': 1} {'type': 'loss', 'content': 0.2374909371137619, 'timestamp': '2025-09-30 22:15:02.840339', 'step': 2355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.873507', 'step': 2355, 'epoch': 1} {'type': 'loss', 'content': 0.15738871693611145, 'timestamp': '2025-09-30 22:15:02.897332', 'step': 2356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:02.931593', 'step': 2356, 'epoch': 1} {'type': 'loss', 'content': 0.16256950795650482, 'timestamp': '2025-09-30 22:15:02.934266', 'step': 2357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:02.965190', 'step': 2357, 'epoch': 1} {'type': 'loss', 'content': 0.12394386529922485, 'timestamp': '2025-09-30 22:15:02.970737', 'step': 2358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:03.004822', 'step': 2358, 'epoch': 1} {'type': 'loss', 'content': 0.18415656685829163, 'timestamp': '2025-09-30 22:15:03.008778', 'step': 2359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.041593', 'step': 2359, 'epoch': 1} {'type': 'loss', 'content': 0.20952630043029785, 'timestamp': '2025-09-30 22:15:03.065732', 'step': 2360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.107134', 'step': 2360, 'epoch': 1} {'type': 'loss', 'content': 0.2581378221511841, 'timestamp': '2025-09-30 22:15:03.112693', 'step': 2361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.144805', 'step': 2361, 'epoch': 1} {'type': 'loss', 'content': 0.19247564673423767, 'timestamp': '2025-09-30 22:15:03.147371', 'step': 2362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:03.181255', 'step': 2362, 'epoch': 1} {'type': 'loss', 'content': 0.1451745182275772, 'timestamp': '2025-09-30 22:15:03.183371', 'step': 2363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:03.215711', 'step': 2363, 'epoch': 1} {'type': 'loss', 'content': 0.3604291081428528, 'timestamp': '2025-09-30 22:15:03.241170', 'step': 2364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.273101', 'step': 2364, 'epoch': 1} {'type': 'loss', 'content': 0.21693502366542816, 'timestamp': '2025-09-30 22:15:03.277605', 'step': 2365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:03.309673', 'step': 2365, 'epoch': 1} {'type': 'loss', 'content': 0.2694041132926941, 'timestamp': '2025-09-30 22:15:03.312254', 'step': 2366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:03.346526', 'step': 2366, 'epoch': 1} {'type': 'loss', 'content': 0.1508629322052002, 'timestamp': '2025-09-30 22:15:03.351520', 'step': 2367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.389045', 'step': 2367, 'epoch': 1} {'type': 'loss', 'content': 0.20565664768218994, 'timestamp': '2025-09-30 22:15:03.418268', 'step': 2368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.451374', 'step': 2368, 'epoch': 1} {'type': 'loss', 'content': 0.24871887266635895, 'timestamp': '2025-09-30 22:15:03.454801', 'step': 2369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:03.496673', 'step': 2369, 'epoch': 1} {'type': 'loss', 'content': 0.15665598213672638, 'timestamp': '2025-09-30 22:15:03.500550', 'step': 2370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.537383', 'step': 2370, 'epoch': 1} {'type': 'loss', 'content': 0.15164920687675476, 'timestamp': '2025-09-30 22:15:03.542560', 'step': 2371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.578557', 'step': 2371, 'epoch': 1} {'type': 'loss', 'content': 0.12850894033908844, 'timestamp': '2025-09-30 22:15:03.603070', 'step': 2372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.633415', 'step': 2372, 'epoch': 1} {'type': 'loss', 'content': 0.20533707737922668, 'timestamp': '2025-09-30 22:15:03.635597', 'step': 2373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.667535', 'step': 2373, 'epoch': 1} {'type': 'loss', 'content': 0.21682168543338776, 'timestamp': '2025-09-30 22:15:03.676256', 'step': 2374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.707579', 'step': 2374, 'epoch': 1} {'type': 'loss', 'content': 0.23898452520370483, 'timestamp': '2025-09-30 22:15:03.718233', 'step': 2375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:03.752601', 'step': 2375, 'epoch': 1} {'type': 'loss', 'content': 0.18200986087322235, 'timestamp': '2025-09-30 22:15:03.777423', 'step': 2376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.813727', 'step': 2376, 'epoch': 1} {'type': 'loss', 'content': 0.27965807914733887, 'timestamp': '2025-09-30 22:15:03.816627', 'step': 2377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.849769', 'step': 2377, 'epoch': 1} {'type': 'loss', 'content': 0.23605836927890778, 'timestamp': '2025-09-30 22:15:03.851847', 'step': 2378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.883100', 'step': 2378, 'epoch': 1} {'type': 'loss', 'content': 0.14286212623119354, 'timestamp': '2025-09-30 22:15:03.885646', 'step': 2379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:03.917716', 'step': 2379, 'epoch': 1} {'type': 'loss', 'content': 0.14693084359169006, 'timestamp': '2025-09-30 22:15:03.942234', 'step': 2380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:03.973386', 'step': 2380, 'epoch': 1} {'type': 'loss', 'content': 0.11217545717954636, 'timestamp': '2025-09-30 22:15:03.976527', 'step': 2381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:04.010280', 'step': 2381, 'epoch': 1} {'type': 'loss', 'content': 0.21781258285045624, 'timestamp': '2025-09-30 22:15:04.012660', 'step': 2382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.042951', 'step': 2382, 'epoch': 1} {'type': 'loss', 'content': 0.19165381789207458, 'timestamp': '2025-09-30 22:15:04.045508', 'step': 2383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:04.075368', 'step': 2383, 'epoch': 1} {'type': 'loss', 'content': 0.29316678643226624, 'timestamp': '2025-09-30 22:15:04.099397', 'step': 2384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:04.136671', 'step': 2384, 'epoch': 1} {'type': 'loss', 'content': 0.20287515223026276, 'timestamp': '2025-09-30 22:15:04.140866', 'step': 2385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:04.172519', 'step': 2385, 'epoch': 1} {'type': 'loss', 'content': 0.2158275842666626, 'timestamp': '2025-09-30 22:15:04.174950', 'step': 2386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.204750', 'step': 2386, 'epoch': 1} {'type': 'loss', 'content': 0.17483577132225037, 'timestamp': '2025-09-30 22:15:04.207314', 'step': 2387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:04.240020', 'step': 2387, 'epoch': 1} {'type': 'loss', 'content': 0.18265406787395477, 'timestamp': '2025-09-30 22:15:04.264027', 'step': 2388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.295162', 'step': 2388, 'epoch': 1} {'type': 'loss', 'content': 0.1925816535949707, 'timestamp': '2025-09-30 22:15:04.302344', 'step': 2389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.334594', 'step': 2389, 'epoch': 1} {'type': 'loss', 'content': 0.2135317325592041, 'timestamp': '2025-09-30 22:15:04.337846', 'step': 2390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:04.369091', 'step': 2390, 'epoch': 1} {'type': 'loss', 'content': 0.2708750367164612, 'timestamp': '2025-09-30 22:15:04.373497', 'step': 2391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.406277', 'step': 2391, 'epoch': 1} {'type': 'loss', 'content': 0.21097564697265625, 'timestamp': '2025-09-30 22:15:04.430305', 'step': 2392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.461578', 'step': 2392, 'epoch': 1} {'type': 'loss', 'content': 0.24653300642967224, 'timestamp': '2025-09-30 22:15:04.464612', 'step': 2393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.496070', 'step': 2393, 'epoch': 1} {'type': 'loss', 'content': 0.1163799911737442, 'timestamp': '2025-09-30 22:15:04.499023', 'step': 2394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.538904', 'step': 2394, 'epoch': 1} {'type': 'loss', 'content': 0.2002795934677124, 'timestamp': '2025-09-30 22:15:04.543479', 'step': 2395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.575847', 'step': 2395, 'epoch': 1} {'type': 'loss', 'content': 0.21010348200798035, 'timestamp': '2025-09-30 22:15:04.600428', 'step': 2396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.632006', 'step': 2396, 'epoch': 1} {'type': 'loss', 'content': 0.11746878921985626, 'timestamp': '2025-09-30 22:15:04.634999', 'step': 2397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:04.664857', 'step': 2397, 'epoch': 1} {'type': 'loss', 'content': 0.13319210708141327, 'timestamp': '2025-09-30 22:15:04.667230', 'step': 2398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.699051', 'step': 2398, 'epoch': 1} {'type': 'loss', 'content': 0.1864929050207138, 'timestamp': '2025-09-30 22:15:04.701438', 'step': 2399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:04.731154', 'step': 2399, 'epoch': 1} {'type': 'loss', 'content': 0.16559074819087982, 'timestamp': '2025-09-30 22:15:04.754787', 'step': 2400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.786876', 'step': 2400, 'epoch': 1} {'type': 'loss', 'content': 0.17468701303005219, 'timestamp': '2025-09-30 22:15:04.791481', 'step': 2401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.823097', 'step': 2401, 'epoch': 1} {'type': 'loss', 'content': 0.13519950211048126, 'timestamp': '2025-09-30 22:15:04.825444', 'step': 2402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.856471', 'step': 2402, 'epoch': 1} {'type': 'loss', 'content': 0.18756867945194244, 'timestamp': '2025-09-30 22:15:04.861303', 'step': 2403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.893425', 'step': 2403, 'epoch': 1} {'type': 'loss', 'content': 0.12654319405555725, 'timestamp': '2025-09-30 22:15:04.917484', 'step': 2404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:04.947251', 'step': 2404, 'epoch': 1} {'type': 'loss', 'content': 0.13107647001743317, 'timestamp': '2025-09-30 22:15:04.951344', 'step': 2405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:04.984953', 'step': 2405, 'epoch': 1} {'type': 'loss', 'content': 0.16868318617343903, 'timestamp': '2025-09-30 22:15:04.987433', 'step': 2406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.022796', 'step': 2406, 'epoch': 1} {'type': 'loss', 'content': 0.1637306958436966, 'timestamp': '2025-09-30 22:15:05.034265', 'step': 2407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:05.069448', 'step': 2407, 'epoch': 1} {'type': 'loss', 'content': 0.15542539954185486, 'timestamp': '2025-09-30 22:15:05.093369', 'step': 2408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:05.122955', 'step': 2408, 'epoch': 1} {'type': 'loss', 'content': 0.21218141913414001, 'timestamp': '2025-09-30 22:15:05.125086', 'step': 2409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:05.159254', 'step': 2409, 'epoch': 1} {'type': 'loss', 'content': 0.2434595823287964, 'timestamp': '2025-09-30 22:15:05.161702', 'step': 2410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:05.198664', 'step': 2410, 'epoch': 1} {'type': 'loss', 'content': 0.2986154556274414, 'timestamp': '2025-09-30 22:15:05.203323', 'step': 2411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:05.233285', 'step': 2411, 'epoch': 1} {'type': 'loss', 'content': 0.2320234477519989, 'timestamp': '2025-09-30 22:15:05.258971', 'step': 2412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.294328', 'step': 2412, 'epoch': 1} {'type': 'loss', 'content': 0.20965951681137085, 'timestamp': '2025-09-30 22:15:05.296347', 'step': 2413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:05.328189', 'step': 2413, 'epoch': 1} {'type': 'loss', 'content': 0.20373988151550293, 'timestamp': '2025-09-30 22:15:05.337894', 'step': 2414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:05.374379', 'step': 2414, 'epoch': 1} {'type': 'loss', 'content': 0.2546688914299011, 'timestamp': '2025-09-30 22:15:05.377131', 'step': 2415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:05.413349', 'step': 2415, 'epoch': 1} {'type': 'loss', 'content': 0.2596491873264313, 'timestamp': '2025-09-30 22:15:05.439230', 'step': 2416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:05.468363', 'step': 2416, 'epoch': 1} {'type': 'loss', 'content': 0.1569444090127945, 'timestamp': '2025-09-30 22:15:05.470492', 'step': 2417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:05.504346', 'step': 2417, 'epoch': 1} {'type': 'loss', 'content': 0.27781352400779724, 'timestamp': '2025-09-30 22:15:05.507026', 'step': 2418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.541337', 'step': 2418, 'epoch': 1} {'type': 'loss', 'content': 0.16568468511104584, 'timestamp': '2025-09-30 22:15:05.544849', 'step': 2419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.580544', 'step': 2419, 'epoch': 1} {'type': 'loss', 'content': 0.1723802089691162, 'timestamp': '2025-09-30 22:15:05.604715', 'step': 2420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:05.641547', 'step': 2420, 'epoch': 1} {'type': 'loss', 'content': 0.167421355843544, 'timestamp': '2025-09-30 22:15:05.645458', 'step': 2421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:05.676864', 'step': 2421, 'epoch': 1} {'type': 'loss', 'content': 0.24825777113437653, 'timestamp': '2025-09-30 22:15:05.680673', 'step': 2422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:05.715506', 'step': 2422, 'epoch': 1} {'type': 'loss', 'content': 0.20354630053043365, 'timestamp': '2025-09-30 22:15:05.717924', 'step': 2423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:05.748272', 'step': 2423, 'epoch': 1} {'type': 'loss', 'content': 0.12855881452560425, 'timestamp': '2025-09-30 22:15:05.772056', 'step': 2424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.803777', 'step': 2424, 'epoch': 1} {'type': 'loss', 'content': 0.18143804371356964, 'timestamp': '2025-09-30 22:15:05.811782', 'step': 2425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.842532', 'step': 2425, 'epoch': 1} {'type': 'loss', 'content': 0.17697030305862427, 'timestamp': '2025-09-30 22:15:05.845877', 'step': 2426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.877774', 'step': 2426, 'epoch': 1} {'type': 'loss', 'content': 0.14878280460834503, 'timestamp': '2025-09-30 22:15:05.881048', 'step': 2427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:05.913204', 'step': 2427, 'epoch': 1} {'type': 'loss', 'content': 0.2570720613002777, 'timestamp': '2025-09-30 22:15:05.940850', 'step': 2428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:05.978232', 'step': 2428, 'epoch': 1} {'type': 'loss', 'content': 0.1264123022556305, 'timestamp': '2025-09-30 22:15:05.980282', 'step': 2429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:06.011026', 'step': 2429, 'epoch': 1} {'type': 'loss', 'content': 0.12521471083164215, 'timestamp': '2025-09-30 22:15:06.014334', 'step': 2430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.045114', 'step': 2430, 'epoch': 1} {'type': 'loss', 'content': 0.20506106317043304, 'timestamp': '2025-09-30 22:15:06.048541', 'step': 2431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.080008', 'step': 2431, 'epoch': 1} {'type': 'loss', 'content': 0.17456136643886566, 'timestamp': '2025-09-30 22:15:06.103810', 'step': 2432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.135473', 'step': 2432, 'epoch': 1} {'type': 'loss', 'content': 0.1819610297679901, 'timestamp': '2025-09-30 22:15:06.139611', 'step': 2433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:06.171608', 'step': 2433, 'epoch': 1} {'type': 'loss', 'content': 0.12617170810699463, 'timestamp': '2025-09-30 22:15:06.174921', 'step': 2434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.206596', 'step': 2434, 'epoch': 1} {'type': 'loss', 'content': 0.24092210829257965, 'timestamp': '2025-09-30 22:15:06.208960', 'step': 2435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.239629', 'step': 2435, 'epoch': 1} {'type': 'loss', 'content': 0.1151597797870636, 'timestamp': '2025-09-30 22:15:06.264185', 'step': 2436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.293904', 'step': 2436, 'epoch': 1} {'type': 'loss', 'content': 0.13534845411777496, 'timestamp': '2025-09-30 22:15:06.296698', 'step': 2437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.327213', 'step': 2437, 'epoch': 1} {'type': 'loss', 'content': 0.15053807199001312, 'timestamp': '2025-09-30 22:15:06.330392', 'step': 2438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.363192', 'step': 2438, 'epoch': 1} {'type': 'loss', 'content': 0.16322995722293854, 'timestamp': '2025-09-30 22:15:06.365265', 'step': 2439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.397801', 'step': 2439, 'epoch': 1} {'type': 'loss', 'content': 0.17652562260627747, 'timestamp': '2025-09-30 22:15:06.421786', 'step': 2440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.452304', 'step': 2440, 'epoch': 1} {'type': 'loss', 'content': 0.30395451188087463, 'timestamp': '2025-09-30 22:15:06.461601', 'step': 2441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.494553', 'step': 2441, 'epoch': 1} {'type': 'loss', 'content': 0.21025770902633667, 'timestamp': '2025-09-30 22:15:06.496801', 'step': 2442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:06.526813', 'step': 2442, 'epoch': 1} {'type': 'loss', 'content': 0.19648000597953796, 'timestamp': '2025-09-30 22:15:06.528729', 'step': 2443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.559096', 'step': 2443, 'epoch': 1} {'type': 'loss', 'content': 0.22732162475585938, 'timestamp': '2025-09-30 22:15:06.589840', 'step': 2444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:06.619397', 'step': 2444, 'epoch': 1} {'type': 'loss', 'content': 0.07503809779882431, 'timestamp': '2025-09-30 22:15:06.622012', 'step': 2445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.653545', 'step': 2445, 'epoch': 1} {'type': 'loss', 'content': 0.17116987705230713, 'timestamp': '2025-09-30 22:15:06.655643', 'step': 2446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:06.688664', 'step': 2446, 'epoch': 1} {'type': 'loss', 'content': 0.12849050760269165, 'timestamp': '2025-09-30 22:15:06.693310', 'step': 2447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.724901', 'step': 2447, 'epoch': 1} {'type': 'loss', 'content': 0.17692305147647858, 'timestamp': '2025-09-30 22:15:06.748827', 'step': 2448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.782310', 'step': 2448, 'epoch': 1} {'type': 'loss', 'content': 0.2045384645462036, 'timestamp': '2025-09-30 22:15:06.785740', 'step': 2449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:06.818405', 'step': 2449, 'epoch': 1} {'type': 'loss', 'content': 0.17262034118175507, 'timestamp': '2025-09-30 22:15:06.820525', 'step': 2450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.852952', 'step': 2450, 'epoch': 1} {'type': 'loss', 'content': 0.12031494081020355, 'timestamp': '2025-09-30 22:15:06.854995', 'step': 2451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.889040', 'step': 2451, 'epoch': 1} {'type': 'loss', 'content': 0.23789069056510925, 'timestamp': '2025-09-30 22:15:06.914269', 'step': 2452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:06.949536', 'step': 2452, 'epoch': 1} {'type': 'loss', 'content': 0.15853558480739594, 'timestamp': '2025-09-30 22:15:06.954380', 'step': 2453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:06.984812', 'step': 2453, 'epoch': 1} {'type': 'loss', 'content': 0.14548254013061523, 'timestamp': '2025-09-30 22:15:06.989045', 'step': 2454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.029159', 'step': 2454, 'epoch': 1} {'type': 'loss', 'content': 0.15393412113189697, 'timestamp': '2025-09-30 22:15:07.031604', 'step': 2455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:07.064593', 'step': 2455, 'epoch': 1} {'type': 'loss', 'content': 0.17091886699199677, 'timestamp': '2025-09-30 22:15:07.088854', 'step': 2456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.124135', 'step': 2456, 'epoch': 1} {'type': 'loss', 'content': 0.17426499724388123, 'timestamp': '2025-09-30 22:15:07.128145', 'step': 2457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.159350', 'step': 2457, 'epoch': 1} {'type': 'loss', 'content': 0.15641659498214722, 'timestamp': '2025-09-30 22:15:07.171592', 'step': 2458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.204400', 'step': 2458, 'epoch': 1} {'type': 'loss', 'content': 0.24107235670089722, 'timestamp': '2025-09-30 22:15:07.206652', 'step': 2459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.238232', 'step': 2459, 'epoch': 1} {'type': 'loss', 'content': 0.15358413755893707, 'timestamp': '2025-09-30 22:15:07.263784', 'step': 2460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.295438', 'step': 2460, 'epoch': 1} {'type': 'loss', 'content': 0.18601737916469574, 'timestamp': '2025-09-30 22:15:07.301273', 'step': 2461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.336002', 'step': 2461, 'epoch': 1} {'type': 'loss', 'content': 0.18259619176387787, 'timestamp': '2025-09-30 22:15:07.339126', 'step': 2462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.369578', 'step': 2462, 'epoch': 1} {'type': 'loss', 'content': 0.26059773564338684, 'timestamp': '2025-09-30 22:15:07.372585', 'step': 2463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.402948', 'step': 2463, 'epoch': 1} {'type': 'loss', 'content': 0.2160465568304062, 'timestamp': '2025-09-30 22:15:07.433484', 'step': 2464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:07.466288', 'step': 2464, 'epoch': 1} {'type': 'loss', 'content': 0.23637355864048004, 'timestamp': '2025-09-30 22:15:07.469001', 'step': 2465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.499267', 'step': 2465, 'epoch': 1} {'type': 'loss', 'content': 0.18358612060546875, 'timestamp': '2025-09-30 22:15:07.501744', 'step': 2466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.533160', 'step': 2466, 'epoch': 1} {'type': 'loss', 'content': 0.12155307084321976, 'timestamp': '2025-09-30 22:15:07.535584', 'step': 2467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:07.567101', 'step': 2467, 'epoch': 1} {'type': 'loss', 'content': 0.20048996806144714, 'timestamp': '2025-09-30 22:15:07.591043', 'step': 2468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:07.625351', 'step': 2468, 'epoch': 1} {'type': 'loss', 'content': 0.1184520423412323, 'timestamp': '2025-09-30 22:15:07.628136', 'step': 2469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.666903', 'step': 2469, 'epoch': 1} {'type': 'loss', 'content': 0.3215160071849823, 'timestamp': '2025-09-30 22:15:07.670466', 'step': 2470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.700507', 'step': 2470, 'epoch': 1} {'type': 'loss', 'content': 0.1411779522895813, 'timestamp': '2025-09-30 22:15:07.704755', 'step': 2471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:07.736620', 'step': 2471, 'epoch': 1} {'type': 'loss', 'content': 0.16789871454238892, 'timestamp': '2025-09-30 22:15:07.761031', 'step': 2472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.795279', 'step': 2472, 'epoch': 1} {'type': 'loss', 'content': 0.19093309342861176, 'timestamp': '2025-09-30 22:15:07.797976', 'step': 2473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:07.829899', 'step': 2473, 'epoch': 1} {'type': 'loss', 'content': 0.20462442934513092, 'timestamp': '2025-09-30 22:15:07.832559', 'step': 2474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:07.862901', 'step': 2474, 'epoch': 1} {'type': 'loss', 'content': 0.10849784314632416, 'timestamp': '2025-09-30 22:15:07.869605', 'step': 2475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:07.900872', 'step': 2475, 'epoch': 1} {'type': 'loss', 'content': 0.18311817944049835, 'timestamp': '2025-09-30 22:15:07.924638', 'step': 2476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:07.956547', 'step': 2476, 'epoch': 1} {'type': 'loss', 'content': 0.2418837994337082, 'timestamp': '2025-09-30 22:15:07.961542', 'step': 2477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:07.992612', 'step': 2477, 'epoch': 1} {'type': 'loss', 'content': 0.08602361381053925, 'timestamp': '2025-09-30 22:15:07.995679', 'step': 2478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.028093', 'step': 2478, 'epoch': 1} {'type': 'loss', 'content': 0.16489575803279877, 'timestamp': '2025-09-30 22:15:08.032366', 'step': 2479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:08.066496', 'step': 2479, 'epoch': 1} {'type': 'loss', 'content': 0.13294677436351776, 'timestamp': '2025-09-30 22:15:08.090216', 'step': 2480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:08.123486', 'step': 2480, 'epoch': 1} {'type': 'loss', 'content': 0.2638319432735443, 'timestamp': '2025-09-30 22:15:08.125892', 'step': 2481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:08.156212', 'step': 2481, 'epoch': 1} {'type': 'loss', 'content': 0.1451783925294876, 'timestamp': '2025-09-30 22:15:08.158516', 'step': 2482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:08.189004', 'step': 2482, 'epoch': 1} {'type': 'loss', 'content': 0.2246534675359726, 'timestamp': '2025-09-30 22:15:08.193558', 'step': 2483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.226758', 'step': 2483, 'epoch': 1} {'type': 'loss', 'content': 0.1886189877986908, 'timestamp': '2025-09-30 22:15:08.252303', 'step': 2484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.282561', 'step': 2484, 'epoch': 1} {'type': 'loss', 'content': 0.23227162659168243, 'timestamp': '2025-09-30 22:15:08.288070', 'step': 2485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:08.320551', 'step': 2485, 'epoch': 1} {'type': 'loss', 'content': 0.23987360298633575, 'timestamp': '2025-09-30 22:15:08.322647', 'step': 2486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:08.361091', 'step': 2486, 'epoch': 1} {'type': 'loss', 'content': 0.12282267212867737, 'timestamp': '2025-09-30 22:15:08.365559', 'step': 2487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:08.398025', 'step': 2487, 'epoch': 1} {'type': 'loss', 'content': 0.2867606282234192, 'timestamp': '2025-09-30 22:15:08.422442', 'step': 2488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.452111', 'step': 2488, 'epoch': 1} {'type': 'loss', 'content': 0.18896768987178802, 'timestamp': '2025-09-30 22:15:08.460118', 'step': 2489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.493552', 'step': 2489, 'epoch': 1} {'type': 'loss', 'content': 0.14965973794460297, 'timestamp': '2025-09-30 22:15:08.498566', 'step': 2490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.532511', 'step': 2490, 'epoch': 1} {'type': 'loss', 'content': 0.2073834389448166, 'timestamp': '2025-09-30 22:15:08.537742', 'step': 2491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:08.568613', 'step': 2491, 'epoch': 1} {'type': 'loss', 'content': 0.1507977694272995, 'timestamp': '2025-09-30 22:15:08.594243', 'step': 2492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.627386', 'step': 2492, 'epoch': 1} {'type': 'loss', 'content': 0.13317671418190002, 'timestamp': '2025-09-30 22:15:08.630815', 'step': 2493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.661329', 'step': 2493, 'epoch': 1} {'type': 'loss', 'content': 0.29560738801956177, 'timestamp': '2025-09-30 22:15:08.672571', 'step': 2494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.704092', 'step': 2494, 'epoch': 1} {'type': 'loss', 'content': 0.2575916647911072, 'timestamp': '2025-09-30 22:15:08.706273', 'step': 2495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.737642', 'step': 2495, 'epoch': 1} {'type': 'loss', 'content': 0.1426602303981781, 'timestamp': '2025-09-30 22:15:08.761566', 'step': 2496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.802945', 'step': 2496, 'epoch': 1} {'type': 'loss', 'content': 0.13083547353744507, 'timestamp': '2025-09-30 22:15:08.805430', 'step': 2497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:08.842716', 'step': 2497, 'epoch': 1} {'type': 'loss', 'content': 0.19983011484146118, 'timestamp': '2025-09-30 22:15:08.845570', 'step': 2498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:08.882679', 'step': 2498, 'epoch': 1} {'type': 'loss', 'content': 0.11450948566198349, 'timestamp': '2025-09-30 22:15:08.889722', 'step': 2499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:08.924342', 'step': 2499, 'epoch': 1} {'type': 'loss', 'content': 0.19948463141918182, 'timestamp': '2025-09-30 22:15:08.951356', 'step': 2500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 2500', 'timestamp': '2025-09-30 22:15:13.897840', 'step': 2500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:13.940638', 'step': 2500, 'epoch': 1} {'type': 'loss', 'content': 0.17636637389659882, 'timestamp': '2025-09-30 22:15:13.942904', 'step': 2501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:13.976378', 'step': 2501, 'epoch': 1} {'type': 'loss', 'content': 0.2002297341823578, 'timestamp': '2025-09-30 22:15:13.979302', 'step': 2502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.009428', 'step': 2502, 'epoch': 1} {'type': 'loss', 'content': 0.1107046902179718, 'timestamp': '2025-09-30 22:15:14.012064', 'step': 2503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:14.045327', 'step': 2503, 'epoch': 1} {'type': 'loss', 'content': 0.18308766186237335, 'timestamp': '2025-09-30 22:15:14.074863', 'step': 2504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:14.107918', 'step': 2504, 'epoch': 1} {'type': 'loss', 'content': 0.21760433912277222, 'timestamp': '2025-09-30 22:15:14.110549', 'step': 2505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.143663', 'step': 2505, 'epoch': 1} {'type': 'loss', 'content': 0.1457223743200302, 'timestamp': '2025-09-30 22:15:14.146979', 'step': 2506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.180177', 'step': 2506, 'epoch': 1} {'type': 'loss', 'content': 0.15596914291381836, 'timestamp': '2025-09-30 22:15:14.182896', 'step': 2507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.214509', 'step': 2507, 'epoch': 1} {'type': 'loss', 'content': 0.29341062903404236, 'timestamp': '2025-09-30 22:15:14.244186', 'step': 2508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:14.275402', 'step': 2508, 'epoch': 1} {'type': 'loss', 'content': 0.10742542147636414, 'timestamp': '2025-09-30 22:15:14.278878', 'step': 2509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.309507', 'step': 2509, 'epoch': 1} {'type': 'loss', 'content': 0.15067248046398163, 'timestamp': '2025-09-30 22:15:14.319520', 'step': 2510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.351794', 'step': 2510, 'epoch': 1} {'type': 'loss', 'content': 0.18295973539352417, 'timestamp': '2025-09-30 22:15:14.354793', 'step': 2511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:14.385668', 'step': 2511, 'epoch': 1} {'type': 'loss', 'content': 0.16275952756404877, 'timestamp': '2025-09-30 22:15:14.411419', 'step': 2512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.441777', 'step': 2512, 'epoch': 1} {'type': 'loss', 'content': 0.29999083280563354, 'timestamp': '2025-09-30 22:15:14.445767', 'step': 2513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.477778', 'step': 2513, 'epoch': 1} {'type': 'loss', 'content': 0.15425553917884827, 'timestamp': '2025-09-30 22:15:14.480271', 'step': 2514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.522276', 'step': 2514, 'epoch': 1} {'type': 'loss', 'content': 0.17173735797405243, 'timestamp': '2025-09-30 22:15:14.529267', 'step': 2515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.564964', 'step': 2515, 'epoch': 1} {'type': 'loss', 'content': 0.20578408241271973, 'timestamp': '2025-09-30 22:15:14.592753', 'step': 2516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.626867', 'step': 2516, 'epoch': 1} {'type': 'loss', 'content': 0.2075951099395752, 'timestamp': '2025-09-30 22:15:14.630023', 'step': 2517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.660812', 'step': 2517, 'epoch': 1} {'type': 'loss', 'content': 0.11461330950260162, 'timestamp': '2025-09-30 22:15:14.663746', 'step': 2518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:14.697171', 'step': 2518, 'epoch': 1} {'type': 'loss', 'content': 0.1520071029663086, 'timestamp': '2025-09-30 22:15:14.700965', 'step': 2519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.732477', 'step': 2519, 'epoch': 1} {'type': 'loss', 'content': 0.16452902555465698, 'timestamp': '2025-09-30 22:15:14.756901', 'step': 2520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:14.788311', 'step': 2520, 'epoch': 1} {'type': 'loss', 'content': 0.3131001889705658, 'timestamp': '2025-09-30 22:15:14.790706', 'step': 2521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.821300', 'step': 2521, 'epoch': 1} {'type': 'loss', 'content': 0.08409751206636429, 'timestamp': '2025-09-30 22:15:14.823848', 'step': 2522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:14.853769', 'step': 2522, 'epoch': 1} {'type': 'loss', 'content': 0.13164889812469482, 'timestamp': '2025-09-30 22:15:14.856290', 'step': 2523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.888857', 'step': 2523, 'epoch': 1} {'type': 'loss', 'content': 0.14327366650104523, 'timestamp': '2025-09-30 22:15:14.915755', 'step': 2524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:14.951930', 'step': 2524, 'epoch': 1} {'type': 'loss', 'content': 0.24280712008476257, 'timestamp': '2025-09-30 22:15:14.954122', 'step': 2525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:14.984037', 'step': 2525, 'epoch': 1} {'type': 'loss', 'content': 0.12395189702510834, 'timestamp': '2025-09-30 22:15:14.986255', 'step': 2526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:15.017544', 'step': 2526, 'epoch': 1} {'type': 'loss', 'content': 0.24883480370044708, 'timestamp': '2025-09-30 22:15:15.028388', 'step': 2527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.064807', 'step': 2527, 'epoch': 1} {'type': 'loss', 'content': 0.27122414112091064, 'timestamp': '2025-09-30 22:15:15.089631', 'step': 2528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:15.121686', 'step': 2528, 'epoch': 1} {'type': 'loss', 'content': 0.18714256584644318, 'timestamp': '2025-09-30 22:15:15.123863', 'step': 2529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.155260', 'step': 2529, 'epoch': 1} {'type': 'loss', 'content': 0.21174244582653046, 'timestamp': '2025-09-30 22:15:15.158438', 'step': 2530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:15.188710', 'step': 2530, 'epoch': 1} {'type': 'loss', 'content': 0.20135077834129333, 'timestamp': '2025-09-30 22:15:15.191105', 'step': 2531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.221273', 'step': 2531, 'epoch': 1} {'type': 'loss', 'content': 0.18362776935100555, 'timestamp': '2025-09-30 22:15:15.246249', 'step': 2532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.276389', 'step': 2532, 'epoch': 1} {'type': 'loss', 'content': 0.18893355131149292, 'timestamp': '2025-09-30 22:15:15.278591', 'step': 2533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.309081', 'step': 2533, 'epoch': 1} {'type': 'loss', 'content': 0.13566358387470245, 'timestamp': '2025-09-30 22:15:15.311664', 'step': 2534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.341932', 'step': 2534, 'epoch': 1} {'type': 'loss', 'content': 0.17233362793922424, 'timestamp': '2025-09-30 22:15:15.345569', 'step': 2535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.377184', 'step': 2535, 'epoch': 1} {'type': 'loss', 'content': 0.22294975817203522, 'timestamp': '2025-09-30 22:15:15.401687', 'step': 2536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.434482', 'step': 2536, 'epoch': 1} {'type': 'loss', 'content': 0.09538397192955017, 'timestamp': '2025-09-30 22:15:15.438726', 'step': 2537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.469086', 'step': 2537, 'epoch': 1} {'type': 'loss', 'content': 0.10630924254655838, 'timestamp': '2025-09-30 22:15:15.471693', 'step': 2538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.502521', 'step': 2538, 'epoch': 1} {'type': 'loss', 'content': 0.18515358865261078, 'timestamp': '2025-09-30 22:15:15.504887', 'step': 2539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.535076', 'step': 2539, 'epoch': 1} {'type': 'loss', 'content': 0.19331295788288116, 'timestamp': '2025-09-30 22:15:15.558882', 'step': 2540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:15.589339', 'step': 2540, 'epoch': 1} {'type': 'loss', 'content': 0.1989711970090866, 'timestamp': '2025-09-30 22:15:15.596344', 'step': 2541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.627275', 'step': 2541, 'epoch': 1} {'type': 'loss', 'content': 0.11013370752334595, 'timestamp': '2025-09-30 22:15:15.630064', 'step': 2542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.660265', 'step': 2542, 'epoch': 1} {'type': 'loss', 'content': 0.12153515964746475, 'timestamp': '2025-09-30 22:15:15.663345', 'step': 2543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.696098', 'step': 2543, 'epoch': 1} {'type': 'loss', 'content': 0.17753934860229492, 'timestamp': '2025-09-30 22:15:15.721846', 'step': 2544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.752104', 'step': 2544, 'epoch': 1} {'type': 'loss', 'content': 0.17542226612567902, 'timestamp': '2025-09-30 22:15:15.754523', 'step': 2545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:15.794121', 'step': 2545, 'epoch': 1} {'type': 'loss', 'content': 0.1517001837491989, 'timestamp': '2025-09-30 22:15:15.796417', 'step': 2546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:15.827899', 'step': 2546, 'epoch': 1} {'type': 'loss', 'content': 0.20128028094768524, 'timestamp': '2025-09-30 22:15:15.831661', 'step': 2547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:15.864030', 'step': 2547, 'epoch': 1} {'type': 'loss', 'content': 0.2547612488269806, 'timestamp': '2025-09-30 22:15:15.888146', 'step': 2548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:15.919274', 'step': 2548, 'epoch': 1} {'type': 'loss', 'content': 0.230888232588768, 'timestamp': '2025-09-30 22:15:15.925335', 'step': 2549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:15.964748', 'step': 2549, 'epoch': 1} {'type': 'loss', 'content': 0.11695428937673569, 'timestamp': '2025-09-30 22:15:15.967930', 'step': 2550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:15.998188', 'step': 2550, 'epoch': 1} {'type': 'loss', 'content': 0.2150387465953827, 'timestamp': '2025-09-30 22:15:16.000626', 'step': 2551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:16.031231', 'step': 2551, 'epoch': 1} {'type': 'loss', 'content': 0.11303716897964478, 'timestamp': '2025-09-30 22:15:16.055041', 'step': 2552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.085622', 'step': 2552, 'epoch': 1} {'type': 'loss', 'content': 0.21564728021621704, 'timestamp': '2025-09-30 22:15:16.088072', 'step': 2553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:16.118663', 'step': 2553, 'epoch': 1} {'type': 'loss', 'content': 0.16776922345161438, 'timestamp': '2025-09-30 22:15:16.125268', 'step': 2554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:16.159445', 'step': 2554, 'epoch': 1} {'type': 'loss', 'content': 0.20092487335205078, 'timestamp': '2025-09-30 22:15:16.165717', 'step': 2555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:16.200094', 'step': 2555, 'epoch': 1} {'type': 'loss', 'content': 0.14718997478485107, 'timestamp': '2025-09-30 22:15:16.227351', 'step': 2556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.260155', 'step': 2556, 'epoch': 1} {'type': 'loss', 'content': 0.13506968319416046, 'timestamp': '2025-09-30 22:15:16.267117', 'step': 2557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:16.297088', 'step': 2557, 'epoch': 1} {'type': 'loss', 'content': 0.13712993264198303, 'timestamp': '2025-09-30 22:15:16.300106', 'step': 2558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:16.334701', 'step': 2558, 'epoch': 1} {'type': 'loss', 'content': 0.09912290424108505, 'timestamp': '2025-09-30 22:15:16.340537', 'step': 2559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:16.377264', 'step': 2559, 'epoch': 1} {'type': 'loss', 'content': 0.2214200645685196, 'timestamp': '2025-09-30 22:15:16.401148', 'step': 2560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.432338', 'step': 2560, 'epoch': 1} {'type': 'loss', 'content': 0.1237788274884224, 'timestamp': '2025-09-30 22:15:16.438403', 'step': 2561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:16.469394', 'step': 2561, 'epoch': 1} {'type': 'loss', 'content': 0.06481745094060898, 'timestamp': '2025-09-30 22:15:16.477400', 'step': 2562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:16.511323', 'step': 2562, 'epoch': 1} {'type': 'loss', 'content': 0.09048906713724136, 'timestamp': '2025-09-30 22:15:16.517536', 'step': 2563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:16.553240', 'step': 2563, 'epoch': 1} {'type': 'loss', 'content': 0.08184308558702469, 'timestamp': '2025-09-30 22:15:16.580575', 'step': 2564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.610509', 'step': 2564, 'epoch': 1} {'type': 'loss', 'content': 0.16256730258464813, 'timestamp': '2025-09-30 22:15:16.613325', 'step': 2565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:16.647099', 'step': 2565, 'epoch': 1} {'type': 'loss', 'content': 0.14782263338565826, 'timestamp': '2025-09-30 22:15:16.651016', 'step': 2566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:16.682981', 'step': 2566, 'epoch': 1} {'type': 'loss', 'content': 0.14423643052577972, 'timestamp': '2025-09-30 22:15:16.685415', 'step': 2567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.716508', 'step': 2567, 'epoch': 1} {'type': 'loss', 'content': 0.14683333039283752, 'timestamp': '2025-09-30 22:15:16.742082', 'step': 2568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.781615', 'step': 2568, 'epoch': 1} {'type': 'loss', 'content': 0.10947800427675247, 'timestamp': '2025-09-30 22:15:16.787744', 'step': 2569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:16.819583', 'step': 2569, 'epoch': 1} {'type': 'loss', 'content': 0.16882440447807312, 'timestamp': '2025-09-30 22:15:16.829059', 'step': 2570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:16.865080', 'step': 2570, 'epoch': 1} {'type': 'loss', 'content': 0.2610792815685272, 'timestamp': '2025-09-30 22:15:16.869424', 'step': 2571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:16.901690', 'step': 2571, 'epoch': 1} {'type': 'loss', 'content': 0.14350269734859467, 'timestamp': '2025-09-30 22:15:16.930074', 'step': 2572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:16.963341', 'step': 2572, 'epoch': 1} {'type': 'loss', 'content': 0.09903985261917114, 'timestamp': '2025-09-30 22:15:16.965579', 'step': 2573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:16.996270', 'step': 2573, 'epoch': 1} {'type': 'loss', 'content': 0.1329057812690735, 'timestamp': '2025-09-30 22:15:17.002398', 'step': 2574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:17.033888', 'step': 2574, 'epoch': 1} {'type': 'loss', 'content': 0.15654148161411285, 'timestamp': '2025-09-30 22:15:17.038186', 'step': 2575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.071560', 'step': 2575, 'epoch': 1} {'type': 'loss', 'content': 0.08386071771383286, 'timestamp': '2025-09-30 22:15:17.097465', 'step': 2576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.127913', 'step': 2576, 'epoch': 1} {'type': 'loss', 'content': 0.11044757813215256, 'timestamp': '2025-09-30 22:15:17.130879', 'step': 2577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:17.167161', 'step': 2577, 'epoch': 1} {'type': 'loss', 'content': 0.2544488310813904, 'timestamp': '2025-09-30 22:15:17.170242', 'step': 2578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:17.204788', 'step': 2578, 'epoch': 1} {'type': 'loss', 'content': 0.22145000100135803, 'timestamp': '2025-09-30 22:15:17.207226', 'step': 2579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.237627', 'step': 2579, 'epoch': 1} {'type': 'loss', 'content': 0.0823836624622345, 'timestamp': '2025-09-30 22:15:17.262528', 'step': 2580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.294443', 'step': 2580, 'epoch': 1} {'type': 'loss', 'content': 0.18566229939460754, 'timestamp': '2025-09-30 22:15:17.297536', 'step': 2581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.336434', 'step': 2581, 'epoch': 1} {'type': 'loss', 'content': 0.18812769651412964, 'timestamp': '2025-09-30 22:15:17.347738', 'step': 2582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:17.380268', 'step': 2582, 'epoch': 1} {'type': 'loss', 'content': 0.1573707014322281, 'timestamp': '2025-09-30 22:15:17.389228', 'step': 2583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.428103', 'step': 2583, 'epoch': 1} {'type': 'loss', 'content': 0.18350790441036224, 'timestamp': '2025-09-30 22:15:17.457674', 'step': 2584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:17.488271', 'step': 2584, 'epoch': 1} {'type': 'loss', 'content': 0.1581079512834549, 'timestamp': '2025-09-30 22:15:17.490750', 'step': 2585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:17.523447', 'step': 2585, 'epoch': 1} {'type': 'loss', 'content': 0.14165200293064117, 'timestamp': '2025-09-30 22:15:17.526350', 'step': 2586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.557985', 'step': 2586, 'epoch': 1} {'type': 'loss', 'content': 0.164412260055542, 'timestamp': '2025-09-30 22:15:17.563121', 'step': 2587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.599986', 'step': 2587, 'epoch': 1} {'type': 'loss', 'content': 0.17947368323802948, 'timestamp': '2025-09-30 22:15:17.627371', 'step': 2588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.658783', 'step': 2588, 'epoch': 1} {'type': 'loss', 'content': 0.15610316395759583, 'timestamp': '2025-09-30 22:15:17.665573', 'step': 2589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.696291', 'step': 2589, 'epoch': 1} {'type': 'loss', 'content': 0.12638166546821594, 'timestamp': '2025-09-30 22:15:17.699249', 'step': 2590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.731840', 'step': 2590, 'epoch': 1} {'type': 'loss', 'content': 0.1895289123058319, 'timestamp': '2025-09-30 22:15:17.734387', 'step': 2591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.765771', 'step': 2591, 'epoch': 1} {'type': 'loss', 'content': 0.14785990118980408, 'timestamp': '2025-09-30 22:15:17.792625', 'step': 2592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:17.825281', 'step': 2592, 'epoch': 1} {'type': 'loss', 'content': 0.17172768712043762, 'timestamp': '2025-09-30 22:15:17.830537', 'step': 2593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.863146', 'step': 2593, 'epoch': 1} {'type': 'loss', 'content': 0.16450050473213196, 'timestamp': '2025-09-30 22:15:17.865428', 'step': 2594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.897489', 'step': 2594, 'epoch': 1} {'type': 'loss', 'content': 0.06417185813188553, 'timestamp': '2025-09-30 22:15:17.899988', 'step': 2595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.931806', 'step': 2595, 'epoch': 1} {'type': 'loss', 'content': 0.29846492409706116, 'timestamp': '2025-09-30 22:15:17.955756', 'step': 2596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:17.993158', 'step': 2596, 'epoch': 1} {'type': 'loss', 'content': 0.08377067744731903, 'timestamp': '2025-09-30 22:15:17.998055', 'step': 2597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.029929', 'step': 2597, 'epoch': 1} {'type': 'loss', 'content': 0.11213000118732452, 'timestamp': '2025-09-30 22:15:18.033988', 'step': 2598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:18.065060', 'step': 2598, 'epoch': 1} {'type': 'loss', 'content': 0.17413848638534546, 'timestamp': '2025-09-30 22:15:18.074241', 'step': 2599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.104462', 'step': 2599, 'epoch': 1} {'type': 'loss', 'content': 0.207132488489151, 'timestamp': '2025-09-30 22:15:18.128855', 'step': 2600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:18.160807', 'step': 2600, 'epoch': 1} {'type': 'loss', 'content': 0.12751562893390656, 'timestamp': '2025-09-30 22:15:18.169789', 'step': 2601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.205461', 'step': 2601, 'epoch': 1} {'type': 'loss', 'content': 0.19749194383621216, 'timestamp': '2025-09-30 22:15:18.207908', 'step': 2602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.238565', 'step': 2602, 'epoch': 1} {'type': 'loss', 'content': 0.12901869416236877, 'timestamp': '2025-09-30 22:15:18.241958', 'step': 2603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.273623', 'step': 2603, 'epoch': 1} {'type': 'loss', 'content': 0.13426573574543, 'timestamp': '2025-09-30 22:15:18.297811', 'step': 2604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:18.329783', 'step': 2604, 'epoch': 1} {'type': 'loss', 'content': 0.17153103649616241, 'timestamp': '2025-09-30 22:15:18.332981', 'step': 2605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:18.363933', 'step': 2605, 'epoch': 1} {'type': 'loss', 'content': 0.24461376667022705, 'timestamp': '2025-09-30 22:15:18.367322', 'step': 2606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:18.401674', 'step': 2606, 'epoch': 1} {'type': 'loss', 'content': 0.20570556819438934, 'timestamp': '2025-09-30 22:15:18.405779', 'step': 2607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:18.436996', 'step': 2607, 'epoch': 1} {'type': 'loss', 'content': 0.1005357950925827, 'timestamp': '2025-09-30 22:15:18.461053', 'step': 2608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.496150', 'step': 2608, 'epoch': 1} {'type': 'loss', 'content': 0.20324449241161346, 'timestamp': '2025-09-30 22:15:18.500433', 'step': 2609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.530522', 'step': 2609, 'epoch': 1} {'type': 'loss', 'content': 0.18235628306865692, 'timestamp': '2025-09-30 22:15:18.537527', 'step': 2610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.573804', 'step': 2610, 'epoch': 1} {'type': 'loss', 'content': 0.14415329694747925, 'timestamp': '2025-09-30 22:15:18.576150', 'step': 2611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.611309', 'step': 2611, 'epoch': 1} {'type': 'loss', 'content': 0.13804671168327332, 'timestamp': '2025-09-30 22:15:18.635176', 'step': 2612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:18.667421', 'step': 2612, 'epoch': 1} {'type': 'loss', 'content': 0.18393966555595398, 'timestamp': '2025-09-30 22:15:18.671773', 'step': 2613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.702357', 'step': 2613, 'epoch': 1} {'type': 'loss', 'content': 0.11498837172985077, 'timestamp': '2025-09-30 22:15:18.705044', 'step': 2614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.735623', 'step': 2614, 'epoch': 1} {'type': 'loss', 'content': 0.13290657103061676, 'timestamp': '2025-09-30 22:15:18.742979', 'step': 2615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.775638', 'step': 2615, 'epoch': 1} {'type': 'loss', 'content': 0.29096537828445435, 'timestamp': '2025-09-30 22:15:18.801651', 'step': 2616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:18.834144', 'step': 2616, 'epoch': 1} {'type': 'loss', 'content': 0.30814915895462036, 'timestamp': '2025-09-30 22:15:18.839676', 'step': 2617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:18.884952', 'step': 2617, 'epoch': 1} {'type': 'loss', 'content': 0.14455559849739075, 'timestamp': '2025-09-30 22:15:18.890483', 'step': 2618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:18.924011', 'step': 2618, 'epoch': 1} {'type': 'loss', 'content': 0.20893210172653198, 'timestamp': '2025-09-30 22:15:18.929112', 'step': 2619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:18.960517', 'step': 2619, 'epoch': 1} {'type': 'loss', 'content': 0.18989048898220062, 'timestamp': '2025-09-30 22:15:18.984350', 'step': 2620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.014958', 'step': 2620, 'epoch': 1} {'type': 'loss', 'content': 0.12104673683643341, 'timestamp': '2025-09-30 22:15:19.020416', 'step': 2621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.053927', 'step': 2621, 'epoch': 1} {'type': 'loss', 'content': 0.10589296370744705, 'timestamp': '2025-09-30 22:15:19.057233', 'step': 2622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.087543', 'step': 2622, 'epoch': 1} {'type': 'loss', 'content': 0.1448696404695511, 'timestamp': '2025-09-30 22:15:19.090226', 'step': 2623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.122950', 'step': 2623, 'epoch': 1} {'type': 'loss', 'content': 0.23230017721652985, 'timestamp': '2025-09-30 22:15:19.147297', 'step': 2624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.177155', 'step': 2624, 'epoch': 1} {'type': 'loss', 'content': 0.1284952163696289, 'timestamp': '2025-09-30 22:15:19.180691', 'step': 2625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:19.212870', 'step': 2625, 'epoch': 1} {'type': 'loss', 'content': 0.11393342912197113, 'timestamp': '2025-09-30 22:15:19.217295', 'step': 2626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.248215', 'step': 2626, 'epoch': 1} {'type': 'loss', 'content': 0.1684904843568802, 'timestamp': '2025-09-30 22:15:19.250721', 'step': 2627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.282844', 'step': 2627, 'epoch': 1} {'type': 'loss', 'content': 0.16330987215042114, 'timestamp': '2025-09-30 22:15:19.311528', 'step': 2628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:19.344411', 'step': 2628, 'epoch': 1} {'type': 'loss', 'content': 0.19048234820365906, 'timestamp': '2025-09-30 22:15:19.349596', 'step': 2629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.382267', 'step': 2629, 'epoch': 1} {'type': 'loss', 'content': 0.08797554671764374, 'timestamp': '2025-09-30 22:15:19.384823', 'step': 2630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.415635', 'step': 2630, 'epoch': 1} {'type': 'loss', 'content': 0.2159530520439148, 'timestamp': '2025-09-30 22:15:19.422230', 'step': 2631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.454586', 'step': 2631, 'epoch': 1} {'type': 'loss', 'content': 0.13830380141735077, 'timestamp': '2025-09-30 22:15:19.478306', 'step': 2632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.511598', 'step': 2632, 'epoch': 1} {'type': 'loss', 'content': 0.25184521079063416, 'timestamp': '2025-09-30 22:15:19.525655', 'step': 2633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.561779', 'step': 2633, 'epoch': 1} {'type': 'loss', 'content': 0.1669687032699585, 'timestamp': '2025-09-30 22:15:19.566173', 'step': 2634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.606635', 'step': 2634, 'epoch': 1} {'type': 'loss', 'content': 0.16778214275836945, 'timestamp': '2025-09-30 22:15:19.610854', 'step': 2635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.642078', 'step': 2635, 'epoch': 1} {'type': 'loss', 'content': 0.19174841046333313, 'timestamp': '2025-09-30 22:15:19.667625', 'step': 2636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.706817', 'step': 2636, 'epoch': 1} {'type': 'loss', 'content': 0.12165030837059021, 'timestamp': '2025-09-30 22:15:19.710125', 'step': 2637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.743038', 'step': 2637, 'epoch': 1} {'type': 'loss', 'content': 0.1569833606481552, 'timestamp': '2025-09-30 22:15:19.746780', 'step': 2638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.783985', 'step': 2638, 'epoch': 1} {'type': 'loss', 'content': 0.14323143661022186, 'timestamp': '2025-09-30 22:15:19.786771', 'step': 2639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:19.822621', 'step': 2639, 'epoch': 1} {'type': 'loss', 'content': 0.12884925305843353, 'timestamp': '2025-09-30 22:15:19.849543', 'step': 2640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.881774', 'step': 2640, 'epoch': 1} {'type': 'loss', 'content': 0.24552179872989655, 'timestamp': '2025-09-30 22:15:19.884340', 'step': 2641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:19.914569', 'step': 2641, 'epoch': 1} {'type': 'loss', 'content': 0.19098003208637238, 'timestamp': '2025-09-30 22:15:19.917908', 'step': 2642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:19.953424', 'step': 2642, 'epoch': 1} {'type': 'loss', 'content': 0.14106342196464539, 'timestamp': '2025-09-30 22:15:19.956602', 'step': 2643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:19.988618', 'step': 2643, 'epoch': 1} {'type': 'loss', 'content': 0.08529146015644073, 'timestamp': '2025-09-30 22:15:20.013389', 'step': 2644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.044266', 'step': 2644, 'epoch': 1} {'type': 'loss', 'content': 0.12493662536144257, 'timestamp': '2025-09-30 22:15:20.046962', 'step': 2645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.077598', 'step': 2645, 'epoch': 1} {'type': 'loss', 'content': 0.1372973769903183, 'timestamp': '2025-09-30 22:15:20.080614', 'step': 2646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:20.112553', 'step': 2646, 'epoch': 1} {'type': 'loss', 'content': 0.12392084300518036, 'timestamp': '2025-09-30 22:15:20.114837', 'step': 2647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.151902', 'step': 2647, 'epoch': 1} {'type': 'loss', 'content': 0.14303706586360931, 'timestamp': '2025-09-30 22:15:20.175486', 'step': 2648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.207131', 'step': 2648, 'epoch': 1} {'type': 'loss', 'content': 0.12037685513496399, 'timestamp': '2025-09-30 22:15:20.210348', 'step': 2649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.245128', 'step': 2649, 'epoch': 1} {'type': 'loss', 'content': 0.22070078551769257, 'timestamp': '2025-09-30 22:15:20.251565', 'step': 2650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.285318', 'step': 2650, 'epoch': 1} {'type': 'loss', 'content': 0.25903764367103577, 'timestamp': '2025-09-30 22:15:20.289015', 'step': 2651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.320151', 'step': 2651, 'epoch': 1} {'type': 'loss', 'content': 0.21998818218708038, 'timestamp': '2025-09-30 22:15:20.344106', 'step': 2652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:20.374746', 'step': 2652, 'epoch': 1} {'type': 'loss', 'content': 0.13211406767368317, 'timestamp': '2025-09-30 22:15:20.378759', 'step': 2653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.409815', 'step': 2653, 'epoch': 1} {'type': 'loss', 'content': 0.07905744016170502, 'timestamp': '2025-09-30 22:15:20.413360', 'step': 2654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:20.444700', 'step': 2654, 'epoch': 1} {'type': 'loss', 'content': 0.2577662765979767, 'timestamp': '2025-09-30 22:15:20.447052', 'step': 2655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.477211', 'step': 2655, 'epoch': 1} {'type': 'loss', 'content': 0.1398165076971054, 'timestamp': '2025-09-30 22:15:20.503815', 'step': 2656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:20.535339', 'step': 2656, 'epoch': 1} {'type': 'loss', 'content': 0.12262260168790817, 'timestamp': '2025-09-30 22:15:20.537811', 'step': 2657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:20.568019', 'step': 2657, 'epoch': 1} {'type': 'loss', 'content': 0.14374718070030212, 'timestamp': '2025-09-30 22:15:20.570378', 'step': 2658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.600376', 'step': 2658, 'epoch': 1} {'type': 'loss', 'content': 0.14093849062919617, 'timestamp': '2025-09-30 22:15:20.603317', 'step': 2659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.636788', 'step': 2659, 'epoch': 1} {'type': 'loss', 'content': 0.19801545143127441, 'timestamp': '2025-09-30 22:15:20.662861', 'step': 2660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:20.693840', 'step': 2660, 'epoch': 1} {'type': 'loss', 'content': 0.28799933195114136, 'timestamp': '2025-09-30 22:15:20.703869', 'step': 2661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.740715', 'step': 2661, 'epoch': 1} {'type': 'loss', 'content': 0.17457439005374908, 'timestamp': '2025-09-30 22:15:20.746344', 'step': 2662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:20.779407', 'step': 2662, 'epoch': 1} {'type': 'loss', 'content': 0.1418352872133255, 'timestamp': '2025-09-30 22:15:20.781907', 'step': 2663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.812186', 'step': 2663, 'epoch': 1} {'type': 'loss', 'content': 0.16451551020145416, 'timestamp': '2025-09-30 22:15:20.837635', 'step': 2664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.868456', 'step': 2664, 'epoch': 1} {'type': 'loss', 'content': 0.18825821578502655, 'timestamp': '2025-09-30 22:15:20.872480', 'step': 2665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:20.902698', 'step': 2665, 'epoch': 1} {'type': 'loss', 'content': 0.11225425451993942, 'timestamp': '2025-09-30 22:15:20.907387', 'step': 2666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:20.938097', 'step': 2666, 'epoch': 1} {'type': 'loss', 'content': 0.18891066312789917, 'timestamp': '2025-09-30 22:15:20.940774', 'step': 2667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:20.973056', 'step': 2667, 'epoch': 1} {'type': 'loss', 'content': 0.16428431868553162, 'timestamp': '2025-09-30 22:15:20.997901', 'step': 2668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:21.027809', 'step': 2668, 'epoch': 1} {'type': 'loss', 'content': 0.2126665860414505, 'timestamp': '2025-09-30 22:15:21.030023', 'step': 2669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:21.059960', 'step': 2669, 'epoch': 1} {'type': 'loss', 'content': 0.10888869315385818, 'timestamp': '2025-09-30 22:15:21.062669', 'step': 2670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.094394', 'step': 2670, 'epoch': 1} {'type': 'loss', 'content': 0.16062527894973755, 'timestamp': '2025-09-30 22:15:21.103339', 'step': 2671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.138906', 'step': 2671, 'epoch': 1} {'type': 'loss', 'content': 0.117457814514637, 'timestamp': '2025-09-30 22:15:21.166021', 'step': 2672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.200391', 'step': 2672, 'epoch': 1} {'type': 'loss', 'content': 0.13526715338230133, 'timestamp': '2025-09-30 22:15:21.202792', 'step': 2673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.234477', 'step': 2673, 'epoch': 1} {'type': 'loss', 'content': 0.12942372262477875, 'timestamp': '2025-09-30 22:15:21.238203', 'step': 2674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.269739', 'step': 2674, 'epoch': 1} {'type': 'loss', 'content': 0.1898762285709381, 'timestamp': '2025-09-30 22:15:21.272106', 'step': 2675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:15:21.302567', 'step': 2675, 'epoch': 1} {'type': 'loss', 'content': 0.1204276755452156, 'timestamp': '2025-09-30 22:15:21.328227', 'step': 2676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:21.360520', 'step': 2676, 'epoch': 1} {'type': 'loss', 'content': 0.14353224635124207, 'timestamp': '2025-09-30 22:15:21.362924', 'step': 2677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.393103', 'step': 2677, 'epoch': 1} {'type': 'loss', 'content': 0.12400180846452713, 'timestamp': '2025-09-30 22:15:21.395465', 'step': 2678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:21.426460', 'step': 2678, 'epoch': 1} {'type': 'loss', 'content': 0.16711752116680145, 'timestamp': '2025-09-30 22:15:21.429358', 'step': 2679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.463475', 'step': 2679, 'epoch': 1} {'type': 'loss', 'content': 0.1928347647190094, 'timestamp': '2025-09-30 22:15:21.490185', 'step': 2680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.521368', 'step': 2680, 'epoch': 1} {'type': 'loss', 'content': 0.09051686525344849, 'timestamp': '2025-09-30 22:15:21.524325', 'step': 2681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:21.558003', 'step': 2681, 'epoch': 1} {'type': 'loss', 'content': 0.17296171188354492, 'timestamp': '2025-09-30 22:15:21.564986', 'step': 2682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.599378', 'step': 2682, 'epoch': 1} {'type': 'loss', 'content': 0.15993526577949524, 'timestamp': '2025-09-30 22:15:21.605315', 'step': 2683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:21.636629', 'step': 2683, 'epoch': 1} {'type': 'loss', 'content': 0.17383170127868652, 'timestamp': '2025-09-30 22:15:21.669549', 'step': 2684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.706326', 'step': 2684, 'epoch': 1} {'type': 'loss', 'content': 0.11814635246992111, 'timestamp': '2025-09-30 22:15:21.713028', 'step': 2685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.749099', 'step': 2685, 'epoch': 1} {'type': 'loss', 'content': 0.18366459012031555, 'timestamp': '2025-09-30 22:15:21.751796', 'step': 2686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.784105', 'step': 2686, 'epoch': 1} {'type': 'loss', 'content': 0.15334077179431915, 'timestamp': '2025-09-30 22:15:21.786630', 'step': 2687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.818526', 'step': 2687, 'epoch': 1} {'type': 'loss', 'content': 0.13181902468204498, 'timestamp': '2025-09-30 22:15:21.843072', 'step': 2688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.874885', 'step': 2688, 'epoch': 1} {'type': 'loss', 'content': 0.12190696597099304, 'timestamp': '2025-09-30 22:15:21.878000', 'step': 2689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:21.909067', 'step': 2689, 'epoch': 1} {'type': 'loss', 'content': 0.17667530477046967, 'timestamp': '2025-09-30 22:15:21.912800', 'step': 2690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.944796', 'step': 2690, 'epoch': 1} {'type': 'loss', 'content': 0.16493406891822815, 'timestamp': '2025-09-30 22:15:21.946824', 'step': 2691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:21.977328', 'step': 2691, 'epoch': 1} {'type': 'loss', 'content': 0.22555041313171387, 'timestamp': '2025-09-30 22:15:22.001078', 'step': 2692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.033005', 'step': 2692, 'epoch': 1} {'type': 'loss', 'content': 0.15467052161693573, 'timestamp': '2025-09-30 22:15:22.040767', 'step': 2693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:22.075449', 'step': 2693, 'epoch': 1} {'type': 'loss', 'content': 0.1652912050485611, 'timestamp': '2025-09-30 22:15:22.080052', 'step': 2694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.113251', 'step': 2694, 'epoch': 1} {'type': 'loss', 'content': 0.16139943897724152, 'timestamp': '2025-09-30 22:15:22.117344', 'step': 2695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:22.150275', 'step': 2695, 'epoch': 1} {'type': 'loss', 'content': 0.1410113424062729, 'timestamp': '2025-09-30 22:15:22.175476', 'step': 2696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:22.207452', 'step': 2696, 'epoch': 1} {'type': 'loss', 'content': 0.13216626644134521, 'timestamp': '2025-09-30 22:15:22.211023', 'step': 2697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:22.240880', 'step': 2697, 'epoch': 1} {'type': 'loss', 'content': 0.16333509981632233, 'timestamp': '2025-09-30 22:15:22.244331', 'step': 2698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:22.275418', 'step': 2698, 'epoch': 1} {'type': 'loss', 'content': 0.1908002346754074, 'timestamp': '2025-09-30 22:15:22.279388', 'step': 2699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.310139', 'step': 2699, 'epoch': 1} {'type': 'loss', 'content': 0.17989404499530792, 'timestamp': '2025-09-30 22:15:22.335241', 'step': 2700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:22.365328', 'step': 2700, 'epoch': 1} {'type': 'loss', 'content': 0.10828377306461334, 'timestamp': '2025-09-30 22:15:22.368861', 'step': 2701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:22.403499', 'step': 2701, 'epoch': 1} {'type': 'loss', 'content': 0.17170433700084686, 'timestamp': '2025-09-30 22:15:22.406683', 'step': 2702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.438875', 'step': 2702, 'epoch': 1} {'type': 'loss', 'content': 0.268075555562973, 'timestamp': '2025-09-30 22:15:22.441495', 'step': 2703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:22.485782', 'step': 2703, 'epoch': 1} {'type': 'loss', 'content': 0.1772077977657318, 'timestamp': '2025-09-30 22:15:22.509731', 'step': 2704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:22.540848', 'step': 2704, 'epoch': 1} {'type': 'loss', 'content': 0.20388968288898468, 'timestamp': '2025-09-30 22:15:22.547902', 'step': 2705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.592773', 'step': 2705, 'epoch': 1} {'type': 'loss', 'content': 0.15124478936195374, 'timestamp': '2025-09-30 22:15:22.594959', 'step': 2706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.625853', 'step': 2706, 'epoch': 1} {'type': 'loss', 'content': 0.2649190425872803, 'timestamp': '2025-09-30 22:15:22.632669', 'step': 2707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:22.666227', 'step': 2707, 'epoch': 1} {'type': 'loss', 'content': 0.20216389000415802, 'timestamp': '2025-09-30 22:15:22.695002', 'step': 2708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:22.728706', 'step': 2708, 'epoch': 1} {'type': 'loss', 'content': 0.22313974797725677, 'timestamp': '2025-09-30 22:15:22.733355', 'step': 2709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:22.765343', 'step': 2709, 'epoch': 1} {'type': 'loss', 'content': 0.2331104427576065, 'timestamp': '2025-09-30 22:15:22.776953', 'step': 2710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:22.812870', 'step': 2710, 'epoch': 1} {'type': 'loss', 'content': 0.17611254751682281, 'timestamp': '2025-09-30 22:15:22.818738', 'step': 2711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:22.851863', 'step': 2711, 'epoch': 1} {'type': 'loss', 'content': 0.21638263761997223, 'timestamp': '2025-09-30 22:15:22.882474', 'step': 2712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:22.913919', 'step': 2712, 'epoch': 1} {'type': 'loss', 'content': 0.2226937711238861, 'timestamp': '2025-09-30 22:15:22.915878', 'step': 2713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:22.948065', 'step': 2713, 'epoch': 1} {'type': 'loss', 'content': 0.12105342745780945, 'timestamp': '2025-09-30 22:15:22.951879', 'step': 2714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:22.982114', 'step': 2714, 'epoch': 1} {'type': 'loss', 'content': 0.1889893114566803, 'timestamp': '2025-09-30 22:15:22.987588', 'step': 2715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:23.018050', 'step': 2715, 'epoch': 1} {'type': 'loss', 'content': 0.12026210874319077, 'timestamp': '2025-09-30 22:15:23.042065', 'step': 2716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.073263', 'step': 2716, 'epoch': 1} {'type': 'loss', 'content': 0.13013912737369537, 'timestamp': '2025-09-30 22:15:23.076279', 'step': 2717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:23.107472', 'step': 2717, 'epoch': 1} {'type': 'loss', 'content': 0.19018083810806274, 'timestamp': '2025-09-30 22:15:23.113311', 'step': 2718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.143992', 'step': 2718, 'epoch': 1} {'type': 'loss', 'content': 0.13552574813365936, 'timestamp': '2025-09-30 22:15:23.146822', 'step': 2719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.182495', 'step': 2719, 'epoch': 1} {'type': 'loss', 'content': 0.29762059450149536, 'timestamp': '2025-09-30 22:15:23.208388', 'step': 2720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:23.241267', 'step': 2720, 'epoch': 1} {'type': 'loss', 'content': 0.23545491695404053, 'timestamp': '2025-09-30 22:15:23.244137', 'step': 2721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.274605', 'step': 2721, 'epoch': 1} {'type': 'loss', 'content': 0.1616298407316208, 'timestamp': '2025-09-30 22:15:23.276928', 'step': 2722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.308701', 'step': 2722, 'epoch': 1} {'type': 'loss', 'content': 0.1828562170267105, 'timestamp': '2025-09-30 22:15:23.311224', 'step': 2723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:23.344723', 'step': 2723, 'epoch': 1} {'type': 'loss', 'content': 0.17472419142723083, 'timestamp': '2025-09-30 22:15:23.369928', 'step': 2724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:23.402809', 'step': 2724, 'epoch': 1} {'type': 'loss', 'content': 0.2054942101240158, 'timestamp': '2025-09-30 22:15:23.406040', 'step': 2725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.436118', 'step': 2725, 'epoch': 1} {'type': 'loss', 'content': 0.1199483796954155, 'timestamp': '2025-09-30 22:15:23.438767', 'step': 2726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.469926', 'step': 2726, 'epoch': 1} {'type': 'loss', 'content': 0.1804499626159668, 'timestamp': '2025-09-30 22:15:23.472169', 'step': 2727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.502744', 'step': 2727, 'epoch': 1} {'type': 'loss', 'content': 0.23795127868652344, 'timestamp': '2025-09-30 22:15:23.528371', 'step': 2728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:23.563513', 'step': 2728, 'epoch': 1} {'type': 'loss', 'content': 0.14427363872528076, 'timestamp': '2025-09-30 22:15:23.571458', 'step': 2729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.607592', 'step': 2729, 'epoch': 1} {'type': 'loss', 'content': 0.13498806953430176, 'timestamp': '2025-09-30 22:15:23.610585', 'step': 2730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.641587', 'step': 2730, 'epoch': 1} {'type': 'loss', 'content': 0.24491377174854279, 'timestamp': '2025-09-30 22:15:23.644271', 'step': 2731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.679871', 'step': 2731, 'epoch': 1} {'type': 'loss', 'content': 0.1488373875617981, 'timestamp': '2025-09-30 22:15:23.704346', 'step': 2732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:23.747164', 'step': 2732, 'epoch': 1} {'type': 'loss', 'content': 0.13308291137218475, 'timestamp': '2025-09-30 22:15:23.749497', 'step': 2733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.783613', 'step': 2733, 'epoch': 1} {'type': 'loss', 'content': 0.19243183732032776, 'timestamp': '2025-09-30 22:15:23.785875', 'step': 2734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:23.822982', 'step': 2734, 'epoch': 1} {'type': 'loss', 'content': 0.19861668348312378, 'timestamp': '2025-09-30 22:15:23.828014', 'step': 2735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.861164', 'step': 2735, 'epoch': 1} {'type': 'loss', 'content': 0.18003574013710022, 'timestamp': '2025-09-30 22:15:23.884773', 'step': 2736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:23.918372', 'step': 2736, 'epoch': 1} {'type': 'loss', 'content': 0.1116245687007904, 'timestamp': '2025-09-30 22:15:23.920727', 'step': 2737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.952212', 'step': 2737, 'epoch': 1} {'type': 'loss', 'content': 0.19054287672042847, 'timestamp': '2025-09-30 22:15:23.955845', 'step': 2738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:23.991283', 'step': 2738, 'epoch': 1} {'type': 'loss', 'content': 0.1272663176059723, 'timestamp': '2025-09-30 22:15:23.994333', 'step': 2739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.028505', 'step': 2739, 'epoch': 1} {'type': 'loss', 'content': 0.17791038751602173, 'timestamp': '2025-09-30 22:15:24.053832', 'step': 2740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:24.084144', 'step': 2740, 'epoch': 1} {'type': 'loss', 'content': 0.16584168374538422, 'timestamp': '2025-09-30 22:15:24.086679', 'step': 2741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:24.118165', 'step': 2741, 'epoch': 1} {'type': 'loss', 'content': 0.1244010403752327, 'timestamp': '2025-09-30 22:15:24.120510', 'step': 2742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.150410', 'step': 2742, 'epoch': 1} {'type': 'loss', 'content': 0.13533997535705566, 'timestamp': '2025-09-30 22:15:24.163227', 'step': 2743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:24.195028', 'step': 2743, 'epoch': 1} {'type': 'loss', 'content': 0.15512681007385254, 'timestamp': '2025-09-30 22:15:24.219943', 'step': 2744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:24.251790', 'step': 2744, 'epoch': 1} {'type': 'loss', 'content': 0.22110538184642792, 'timestamp': '2025-09-30 22:15:24.254960', 'step': 2745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:24.284780', 'step': 2745, 'epoch': 1} {'type': 'loss', 'content': 0.14564040303230286, 'timestamp': '2025-09-30 22:15:24.287471', 'step': 2746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:24.319326', 'step': 2746, 'epoch': 1} {'type': 'loss', 'content': 0.1431656777858734, 'timestamp': '2025-09-30 22:15:24.324781', 'step': 2747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:24.356279', 'step': 2747, 'epoch': 1} {'type': 'loss', 'content': 0.15101787447929382, 'timestamp': '2025-09-30 22:15:24.379961', 'step': 2748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:15:24.410690', 'step': 2748, 'epoch': 1} {'type': 'loss', 'content': 0.188302144408226, 'timestamp': '2025-09-30 22:15:24.417548', 'step': 2749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:24.454165', 'step': 2749, 'epoch': 1} {'type': 'loss', 'content': 0.1241370216012001, 'timestamp': '2025-09-30 22:15:24.460603', 'step': 2750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.491693', 'step': 2750, 'epoch': 1} {'type': 'loss', 'content': 0.2321355640888214, 'timestamp': '2025-09-30 22:15:24.494151', 'step': 2751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:24.526188', 'step': 2751, 'epoch': 1} {'type': 'loss', 'content': 0.12357111275196075, 'timestamp': '2025-09-30 22:15:24.550201', 'step': 2752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:24.580906', 'step': 2752, 'epoch': 1} {'type': 'loss', 'content': 0.21191337704658508, 'timestamp': '2025-09-30 22:15:24.584042', 'step': 2753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.614225', 'step': 2753, 'epoch': 1} {'type': 'loss', 'content': 0.24350959062576294, 'timestamp': '2025-09-30 22:15:24.616564', 'step': 2754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:24.648761', 'step': 2754, 'epoch': 1} {'type': 'loss', 'content': 0.22118616104125977, 'timestamp': '2025-09-30 22:15:24.651620', 'step': 2755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:24.684924', 'step': 2755, 'epoch': 1} {'type': 'loss', 'content': 0.13191628456115723, 'timestamp': '2025-09-30 22:15:24.709009', 'step': 2756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:24.739212', 'step': 2756, 'epoch': 1} {'type': 'loss', 'content': 0.17536897957324982, 'timestamp': '2025-09-30 22:15:24.741495', 'step': 2757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.771906', 'step': 2757, 'epoch': 1} {'type': 'loss', 'content': 0.19717703759670258, 'timestamp': '2025-09-30 22:15:24.774813', 'step': 2758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:24.805644', 'step': 2758, 'epoch': 1} {'type': 'loss', 'content': 0.16426929831504822, 'timestamp': '2025-09-30 22:15:24.809156', 'step': 2759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:24.840161', 'step': 2759, 'epoch': 1} {'type': 'loss', 'content': 0.0941096618771553, 'timestamp': '2025-09-30 22:15:24.864112', 'step': 2760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:24.894582', 'step': 2760, 'epoch': 1} {'type': 'loss', 'content': 0.15764395892620087, 'timestamp': '2025-09-30 22:15:24.897722', 'step': 2761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:24.931545', 'step': 2761, 'epoch': 1} {'type': 'loss', 'content': 0.13864044845104218, 'timestamp': '2025-09-30 22:15:24.933950', 'step': 2762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:24.970503', 'step': 2762, 'epoch': 1} {'type': 'loss', 'content': 0.1600678563117981, 'timestamp': '2025-09-30 22:15:24.975122', 'step': 2763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:25.009864', 'step': 2763, 'epoch': 1} {'type': 'loss', 'content': 0.1983751803636551, 'timestamp': '2025-09-30 22:15:25.034039', 'step': 2764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.064557', 'step': 2764, 'epoch': 1} {'type': 'loss', 'content': 0.1653209626674652, 'timestamp': '2025-09-30 22:15:25.067838', 'step': 2765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:25.101528', 'step': 2765, 'epoch': 1} {'type': 'loss', 'content': 0.19997571408748627, 'timestamp': '2025-09-30 22:15:25.104408', 'step': 2766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:25.135275', 'step': 2766, 'epoch': 1} {'type': 'loss', 'content': 0.1760759949684143, 'timestamp': '2025-09-30 22:15:25.138169', 'step': 2767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.168102', 'step': 2767, 'epoch': 1} {'type': 'loss', 'content': 0.16830892860889435, 'timestamp': '2025-09-30 22:15:25.191961', 'step': 2768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.221693', 'step': 2768, 'epoch': 1} {'type': 'loss', 'content': 0.14950625598430634, 'timestamp': '2025-09-30 22:15:25.224320', 'step': 2769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.253926', 'step': 2769, 'epoch': 1} {'type': 'loss', 'content': 0.21515542268753052, 'timestamp': '2025-09-30 22:15:25.256859', 'step': 2770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.287540', 'step': 2770, 'epoch': 1} {'type': 'loss', 'content': 0.1036849319934845, 'timestamp': '2025-09-30 22:15:25.294390', 'step': 2771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.325907', 'step': 2771, 'epoch': 1} {'type': 'loss', 'content': 0.23093213140964508, 'timestamp': '2025-09-30 22:15:25.349977', 'step': 2772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:25.382159', 'step': 2772, 'epoch': 1} {'type': 'loss', 'content': 0.07801637798547745, 'timestamp': '2025-09-30 22:15:25.385689', 'step': 2773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.421858', 'step': 2773, 'epoch': 1} {'type': 'loss', 'content': 0.22081950306892395, 'timestamp': '2025-09-30 22:15:25.428473', 'step': 2774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:25.459774', 'step': 2774, 'epoch': 1} {'type': 'loss', 'content': 0.16973096132278442, 'timestamp': '2025-09-30 22:15:25.463226', 'step': 2775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.494145', 'step': 2775, 'epoch': 1} {'type': 'loss', 'content': 0.18126827478408813, 'timestamp': '2025-09-30 22:15:25.519470', 'step': 2776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:25.550990', 'step': 2776, 'epoch': 1} {'type': 'loss', 'content': 0.15263338387012482, 'timestamp': '2025-09-30 22:15:25.555334', 'step': 2777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.592612', 'step': 2777, 'epoch': 1} {'type': 'loss', 'content': 0.13189353048801422, 'timestamp': '2025-09-30 22:15:25.594897', 'step': 2778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.628155', 'step': 2778, 'epoch': 1} {'type': 'loss', 'content': 0.10984543710947037, 'timestamp': '2025-09-30 22:15:25.630918', 'step': 2779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.661873', 'step': 2779, 'epoch': 1} {'type': 'loss', 'content': 0.23736722767353058, 'timestamp': '2025-09-30 22:15:25.686706', 'step': 2780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.718258', 'step': 2780, 'epoch': 1} {'type': 'loss', 'content': 0.17787177860736847, 'timestamp': '2025-09-30 22:15:25.723391', 'step': 2781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.754854', 'step': 2781, 'epoch': 1} {'type': 'loss', 'content': 0.15763942897319794, 'timestamp': '2025-09-30 22:15:25.761356', 'step': 2782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.793100', 'step': 2782, 'epoch': 1} {'type': 'loss', 'content': 0.2031032145023346, 'timestamp': '2025-09-30 22:15:25.795368', 'step': 2783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:25.826977', 'step': 2783, 'epoch': 1} {'type': 'loss', 'content': 0.2159174382686615, 'timestamp': '2025-09-30 22:15:25.851367', 'step': 2784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.882535', 'step': 2784, 'epoch': 1} {'type': 'loss', 'content': 0.11724352091550827, 'timestamp': '2025-09-30 22:15:25.885812', 'step': 2785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.921661', 'step': 2785, 'epoch': 1} {'type': 'loss', 'content': 0.24463637173175812, 'timestamp': '2025-09-30 22:15:25.923936', 'step': 2786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.954507', 'step': 2786, 'epoch': 1} {'type': 'loss', 'content': 0.17066699266433716, 'timestamp': '2025-09-30 22:15:25.957531', 'step': 2787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:25.988736', 'step': 2787, 'epoch': 1} {'type': 'loss', 'content': 0.16854026913642883, 'timestamp': '2025-09-30 22:15:26.012551', 'step': 2788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:26.042829', 'step': 2788, 'epoch': 1} {'type': 'loss', 'content': 0.13720761239528656, 'timestamp': '2025-09-30 22:15:26.045740', 'step': 2789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:26.077358', 'step': 2789, 'epoch': 1} {'type': 'loss', 'content': 0.20273827016353607, 'timestamp': '2025-09-30 22:15:26.082136', 'step': 2790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:26.112773', 'step': 2790, 'epoch': 1} {'type': 'loss', 'content': 0.13199041783809662, 'timestamp': '2025-09-30 22:15:26.115223', 'step': 2791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:26.145988', 'step': 2791, 'epoch': 1} {'type': 'loss', 'content': 0.35337454080581665, 'timestamp': '2025-09-30 22:15:26.171212', 'step': 2792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:26.201844', 'step': 2792, 'epoch': 1} {'type': 'loss', 'content': 0.1485971361398697, 'timestamp': '2025-09-30 22:15:26.204047', 'step': 2793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:26.234478', 'step': 2793, 'epoch': 1} {'type': 'loss', 'content': 0.1719679832458496, 'timestamp': '2025-09-30 22:15:26.237042', 'step': 2794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:26.268017', 'step': 2794, 'epoch': 1} {'type': 'loss', 'content': 0.1598486453294754, 'timestamp': '2025-09-30 22:15:26.275769', 'step': 2795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:26.314404', 'step': 2795, 'epoch': 1} {'type': 'loss', 'content': 0.14390835165977478, 'timestamp': '2025-09-30 22:15:26.345508', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:15:34.365527', 'step': 2796, 'epoch': 1} {'type': 'pplx', 'content': 9680.255702372204, 'timestamp': '2025-09-30 22:15:34.369849', 'step': 2796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:34.399886', 'step': 2796, 'epoch': 1} {'type': 'loss', 'content': 0.2623070478439331, 'timestamp': '2025-09-30 22:15:34.406293', 'step': 2797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:34.442659', 'step': 2797, 'epoch': 1} {'type': 'loss', 'content': 0.19609569013118744, 'timestamp': '2025-09-30 22:15:34.447045', 'step': 2798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:34.495674', 'step': 2798, 'epoch': 1} {'type': 'loss', 'content': 0.10405464470386505, 'timestamp': '2025-09-30 22:15:34.500509', 'step': 2799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:34.538051', 'step': 2799, 'epoch': 1} {'type': 'loss', 'content': 0.16671428084373474, 'timestamp': '2025-09-30 22:15:34.565667', 'step': 2800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:34.597429', 'step': 2800, 'epoch': 1} {'type': 'loss', 'content': 0.11432775110006332, 'timestamp': '2025-09-30 22:15:34.602736', 'step': 2801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:34.637845', 'step': 2801, 'epoch': 1} {'type': 'loss', 'content': 0.14519274234771729, 'timestamp': '2025-09-30 22:15:34.642588', 'step': 2802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:34.679044', 'step': 2802, 'epoch': 1} {'type': 'loss', 'content': 0.10975959151983261, 'timestamp': '2025-09-30 22:15:34.684502', 'step': 2803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:34.719646', 'step': 2803, 'epoch': 1} {'type': 'loss', 'content': 0.1821567416191101, 'timestamp': '2025-09-30 22:15:34.743884', 'step': 2804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:34.785331', 'step': 2804, 'epoch': 1} {'type': 'loss', 'content': 0.17087149620056152, 'timestamp': '2025-09-30 22:15:34.791053', 'step': 2805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:34.826532', 'step': 2805, 'epoch': 1} {'type': 'loss', 'content': 0.12754489481449127, 'timestamp': '2025-09-30 22:15:34.831483', 'step': 2806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:34.883164', 'step': 2806, 'epoch': 1} {'type': 'loss', 'content': 0.25539398193359375, 'timestamp': '2025-09-30 22:15:34.885732', 'step': 2807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:34.918735', 'step': 2807, 'epoch': 1} {'type': 'loss', 'content': 0.20350022614002228, 'timestamp': '2025-09-30 22:15:34.942543', 'step': 2808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:34.974939', 'step': 2808, 'epoch': 1} {'type': 'loss', 'content': 0.37612539529800415, 'timestamp': '2025-09-30 22:15:34.980542', 'step': 2809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.013336', 'step': 2809, 'epoch': 1} {'type': 'loss', 'content': 0.1553942710161209, 'timestamp': '2025-09-30 22:15:35.018772', 'step': 2810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.058430', 'step': 2810, 'epoch': 1} {'type': 'loss', 'content': 0.11773571372032166, 'timestamp': '2025-09-30 22:15:35.063605', 'step': 2811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:35.102014', 'step': 2811, 'epoch': 1} {'type': 'loss', 'content': 0.11421972513198853, 'timestamp': '2025-09-30 22:15:35.126312', 'step': 2812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:35.162408', 'step': 2812, 'epoch': 1} {'type': 'loss', 'content': 0.16250945627689362, 'timestamp': '2025-09-30 22:15:35.164893', 'step': 2813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.206083', 'step': 2813, 'epoch': 1} {'type': 'loss', 'content': 0.15498937666416168, 'timestamp': '2025-09-30 22:15:35.210267', 'step': 2814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.247718', 'step': 2814, 'epoch': 1} {'type': 'loss', 'content': 0.19590844213962555, 'timestamp': '2025-09-30 22:15:35.251467', 'step': 2815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:35.286383', 'step': 2815, 'epoch': 1} {'type': 'loss', 'content': 0.18275311589241028, 'timestamp': '2025-09-30 22:15:35.315249', 'step': 2816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:35.348832', 'step': 2816, 'epoch': 1} {'type': 'loss', 'content': 0.13075490295886993, 'timestamp': '2025-09-30 22:15:35.351041', 'step': 2817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.386401', 'step': 2817, 'epoch': 1} {'type': 'loss', 'content': 0.14456810057163239, 'timestamp': '2025-09-30 22:15:35.388702', 'step': 2818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:35.423116', 'step': 2818, 'epoch': 1} {'type': 'loss', 'content': 0.2935934066772461, 'timestamp': '2025-09-30 22:15:35.427297', 'step': 2819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:35.459826', 'step': 2819, 'epoch': 1} {'type': 'loss', 'content': 0.20998696982860565, 'timestamp': '2025-09-30 22:15:35.486438', 'step': 2820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.518894', 'step': 2820, 'epoch': 1} {'type': 'loss', 'content': 0.17221370339393616, 'timestamp': '2025-09-30 22:15:35.529236', 'step': 2821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.562421', 'step': 2821, 'epoch': 1} {'type': 'loss', 'content': 0.24932754039764404, 'timestamp': '2025-09-30 22:15:35.564682', 'step': 2822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:35.597314', 'step': 2822, 'epoch': 1} {'type': 'loss', 'content': 0.17875175178050995, 'timestamp': '2025-09-30 22:15:35.599845', 'step': 2823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.630327', 'step': 2823, 'epoch': 1} {'type': 'loss', 'content': 0.09329801797866821, 'timestamp': '2025-09-30 22:15:35.657784', 'step': 2824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:35.688668', 'step': 2824, 'epoch': 1} {'type': 'loss', 'content': 0.13750797510147095, 'timestamp': '2025-09-30 22:15:35.692339', 'step': 2825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:35.724730', 'step': 2825, 'epoch': 1} {'type': 'loss', 'content': 0.17278717458248138, 'timestamp': '2025-09-30 22:15:35.727172', 'step': 2826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.758929', 'step': 2826, 'epoch': 1} {'type': 'loss', 'content': 0.169981449842453, 'timestamp': '2025-09-30 22:15:35.763876', 'step': 2827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:35.797922', 'step': 2827, 'epoch': 1} {'type': 'loss', 'content': 0.088066965341568, 'timestamp': '2025-09-30 22:15:35.827536', 'step': 2828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.858472', 'step': 2828, 'epoch': 1} {'type': 'loss', 'content': 0.12251223623752594, 'timestamp': '2025-09-30 22:15:35.861212', 'step': 2829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.891984', 'step': 2829, 'epoch': 1} {'type': 'loss', 'content': 0.27892595529556274, 'timestamp': '2025-09-30 22:15:35.896177', 'step': 2830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.930732', 'step': 2830, 'epoch': 1} {'type': 'loss', 'content': 0.1893150359392166, 'timestamp': '2025-09-30 22:15:35.933520', 'step': 2831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:35.964828', 'step': 2831, 'epoch': 1} {'type': 'loss', 'content': 0.15461041033267975, 'timestamp': '2025-09-30 22:15:35.989007', 'step': 2832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.019453', 'step': 2832, 'epoch': 1} {'type': 'loss', 'content': 0.1198556199669838, 'timestamp': '2025-09-30 22:15:36.023798', 'step': 2833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.062264', 'step': 2833, 'epoch': 1} {'type': 'loss', 'content': 0.10964608192443848, 'timestamp': '2025-09-30 22:15:36.067335', 'step': 2834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:36.098036', 'step': 2834, 'epoch': 1} {'type': 'loss', 'content': 0.11244652420282364, 'timestamp': '2025-09-30 22:15:36.104523', 'step': 2835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.138918', 'step': 2835, 'epoch': 1} {'type': 'loss', 'content': 0.1644873172044754, 'timestamp': '2025-09-30 22:15:36.163517', 'step': 2836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.196621', 'step': 2836, 'epoch': 1} {'type': 'loss', 'content': 0.20906873047351837, 'timestamp': '2025-09-30 22:15:36.199889', 'step': 2837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.230565', 'step': 2837, 'epoch': 1} {'type': 'loss', 'content': 0.20902161300182343, 'timestamp': '2025-09-30 22:15:36.233117', 'step': 2838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.268562', 'step': 2838, 'epoch': 1} {'type': 'loss', 'content': 0.13218633830547333, 'timestamp': '2025-09-30 22:15:36.280278', 'step': 2839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.317445', 'step': 2839, 'epoch': 1} {'type': 'loss', 'content': 0.1706569343805313, 'timestamp': '2025-09-30 22:15:36.342700', 'step': 2840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:36.376981', 'step': 2840, 'epoch': 1} {'type': 'loss', 'content': 0.32456910610198975, 'timestamp': '2025-09-30 22:15:36.380255', 'step': 2841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.410683', 'step': 2841, 'epoch': 1} {'type': 'loss', 'content': 0.1555728316307068, 'timestamp': '2025-09-30 22:15:36.413746', 'step': 2842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:36.445168', 'step': 2842, 'epoch': 1} {'type': 'loss', 'content': 0.1441039741039276, 'timestamp': '2025-09-30 22:15:36.447861', 'step': 2843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:36.478457', 'step': 2843, 'epoch': 1} {'type': 'loss', 'content': 0.1653471738100052, 'timestamp': '2025-09-30 22:15:36.503534', 'step': 2844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.540268', 'step': 2844, 'epoch': 1} {'type': 'loss', 'content': 0.17516948282718658, 'timestamp': '2025-09-30 22:15:36.543416', 'step': 2845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.574928', 'step': 2845, 'epoch': 1} {'type': 'loss', 'content': 0.16221770644187927, 'timestamp': '2025-09-30 22:15:36.578836', 'step': 2846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.618208', 'step': 2846, 'epoch': 1} {'type': 'loss', 'content': 0.1628217250108719, 'timestamp': '2025-09-30 22:15:36.621972', 'step': 2847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.653070', 'step': 2847, 'epoch': 1} {'type': 'loss', 'content': 0.13870704174041748, 'timestamp': '2025-09-30 22:15:36.678945', 'step': 2848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.713782', 'step': 2848, 'epoch': 1} {'type': 'loss', 'content': 0.11623050272464752, 'timestamp': '2025-09-30 22:15:36.716981', 'step': 2849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.748431', 'step': 2849, 'epoch': 1} {'type': 'loss', 'content': 0.1322169005870819, 'timestamp': '2025-09-30 22:15:36.750585', 'step': 2850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:36.782882', 'step': 2850, 'epoch': 1} {'type': 'loss', 'content': 0.1260206550359726, 'timestamp': '2025-09-30 22:15:36.791262', 'step': 2851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:36.830834', 'step': 2851, 'epoch': 1} {'type': 'loss', 'content': 0.14535430073738098, 'timestamp': '2025-09-30 22:15:36.855101', 'step': 2852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:36.887406', 'step': 2852, 'epoch': 1} {'type': 'loss', 'content': 0.15387064218521118, 'timestamp': '2025-09-30 22:15:36.893857', 'step': 2853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.924882', 'step': 2853, 'epoch': 1} {'type': 'loss', 'content': 0.18901526927947998, 'timestamp': '2025-09-30 22:15:36.930955', 'step': 2854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:36.966875', 'step': 2854, 'epoch': 1} {'type': 'loss', 'content': 0.19970133900642395, 'timestamp': '2025-09-30 22:15:36.969979', 'step': 2855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.007501', 'step': 2855, 'epoch': 1} {'type': 'loss', 'content': 0.15145465731620789, 'timestamp': '2025-09-30 22:15:37.032682', 'step': 2856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:37.068310', 'step': 2856, 'epoch': 1} {'type': 'loss', 'content': 0.2493332177400589, 'timestamp': '2025-09-30 22:15:37.077612', 'step': 2857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:37.111482', 'step': 2857, 'epoch': 1} {'type': 'loss', 'content': 0.14010421931743622, 'timestamp': '2025-09-30 22:15:37.126523', 'step': 2858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:37.158075', 'step': 2858, 'epoch': 1} {'type': 'loss', 'content': 0.28255048394203186, 'timestamp': '2025-09-30 22:15:37.168895', 'step': 2859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.201327', 'step': 2859, 'epoch': 1} {'type': 'loss', 'content': 0.1969245821237564, 'timestamp': '2025-09-30 22:15:37.225614', 'step': 2860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.264150', 'step': 2860, 'epoch': 1} {'type': 'loss', 'content': 0.16203495860099792, 'timestamp': '2025-09-30 22:15:37.271108', 'step': 2861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.306044', 'step': 2861, 'epoch': 1} {'type': 'loss', 'content': 0.19236719608306885, 'timestamp': '2025-09-30 22:15:37.317500', 'step': 2862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.353660', 'step': 2862, 'epoch': 1} {'type': 'loss', 'content': 0.07278285920619965, 'timestamp': '2025-09-30 22:15:37.357005', 'step': 2863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.391760', 'step': 2863, 'epoch': 1} {'type': 'loss', 'content': 0.11711367964744568, 'timestamp': '2025-09-30 22:15:37.416418', 'step': 2864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.447901', 'step': 2864, 'epoch': 1} {'type': 'loss', 'content': 0.21569588780403137, 'timestamp': '2025-09-30 22:15:37.450351', 'step': 2865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:37.480760', 'step': 2865, 'epoch': 1} {'type': 'loss', 'content': 0.16878104209899902, 'timestamp': '2025-09-30 22:15:37.486271', 'step': 2866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.520292', 'step': 2866, 'epoch': 1} {'type': 'loss', 'content': 0.2014208287000656, 'timestamp': '2025-09-30 22:15:37.523113', 'step': 2867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.553632', 'step': 2867, 'epoch': 1} {'type': 'loss', 'content': 0.18674524128437042, 'timestamp': '2025-09-30 22:15:37.579881', 'step': 2868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:37.614748', 'step': 2868, 'epoch': 1} {'type': 'loss', 'content': 0.1929522454738617, 'timestamp': '2025-09-30 22:15:37.620338', 'step': 2869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.661408', 'step': 2869, 'epoch': 1} {'type': 'loss', 'content': 0.11780524998903275, 'timestamp': '2025-09-30 22:15:37.663716', 'step': 2870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.697076', 'step': 2870, 'epoch': 1} {'type': 'loss', 'content': 0.1551714390516281, 'timestamp': '2025-09-30 22:15:37.700334', 'step': 2871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.731439', 'step': 2871, 'epoch': 1} {'type': 'loss', 'content': 0.0998600497841835, 'timestamp': '2025-09-30 22:15:37.755820', 'step': 2872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:37.795998', 'step': 2872, 'epoch': 1} {'type': 'loss', 'content': 0.21496865153312683, 'timestamp': '2025-09-30 22:15:37.798650', 'step': 2873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.832039', 'step': 2873, 'epoch': 1} {'type': 'loss', 'content': 0.09704495966434479, 'timestamp': '2025-09-30 22:15:37.835221', 'step': 2874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:37.868268', 'step': 2874, 'epoch': 1} {'type': 'loss', 'content': 0.14026875793933868, 'timestamp': '2025-09-30 22:15:37.871843', 'step': 2875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:37.907793', 'step': 2875, 'epoch': 1} {'type': 'loss', 'content': 0.22652605175971985, 'timestamp': '2025-09-30 22:15:37.937510', 'step': 2876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:37.968654', 'step': 2876, 'epoch': 1} {'type': 'loss', 'content': 0.23067204654216766, 'timestamp': '2025-09-30 22:15:37.971597', 'step': 2877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:38.002770', 'step': 2877, 'epoch': 1} {'type': 'loss', 'content': 0.17426691949367523, 'timestamp': '2025-09-30 22:15:38.009355', 'step': 2878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:38.044496', 'step': 2878, 'epoch': 1} {'type': 'loss', 'content': 0.1614445298910141, 'timestamp': '2025-09-30 22:15:38.050171', 'step': 2879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:38.086092', 'step': 2879, 'epoch': 1} {'type': 'loss', 'content': 0.18610532581806183, 'timestamp': '2025-09-30 22:15:38.110464', 'step': 2880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:38.143468', 'step': 2880, 'epoch': 1} {'type': 'loss', 'content': 0.18937070667743683, 'timestamp': '2025-09-30 22:15:38.151106', 'step': 2881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.185026', 'step': 2881, 'epoch': 1} {'type': 'loss', 'content': 0.2187935709953308, 'timestamp': '2025-09-30 22:15:38.192470', 'step': 2882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:38.237819', 'step': 2882, 'epoch': 1} {'type': 'loss', 'content': 0.14558683335781097, 'timestamp': '2025-09-30 22:15:38.244886', 'step': 2883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:38.283897', 'step': 2883, 'epoch': 1} {'type': 'loss', 'content': 0.24459326267242432, 'timestamp': '2025-09-30 22:15:38.307748', 'step': 2884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:38.359964', 'step': 2884, 'epoch': 1} {'type': 'loss', 'content': 0.12890639901161194, 'timestamp': '2025-09-30 22:15:38.362450', 'step': 2885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.396897', 'step': 2885, 'epoch': 1} {'type': 'loss', 'content': 0.1206836923956871, 'timestamp': '2025-09-30 22:15:38.401041', 'step': 2886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.434750', 'step': 2886, 'epoch': 1} {'type': 'loss', 'content': 0.15416386723518372, 'timestamp': '2025-09-30 22:15:38.437595', 'step': 2887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:38.477713', 'step': 2887, 'epoch': 1} {'type': 'loss', 'content': 0.11849914491176605, 'timestamp': '2025-09-30 22:15:38.501725', 'step': 2888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:38.535892', 'step': 2888, 'epoch': 1} {'type': 'loss', 'content': 0.18527163565158844, 'timestamp': '2025-09-30 22:15:38.543021', 'step': 2889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:38.586465', 'step': 2889, 'epoch': 1} {'type': 'loss', 'content': 0.14543871581554413, 'timestamp': '2025-09-30 22:15:38.589232', 'step': 2890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:38.620334', 'step': 2890, 'epoch': 1} {'type': 'loss', 'content': 0.10438080877065659, 'timestamp': '2025-09-30 22:15:38.623014', 'step': 2891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:38.655333', 'step': 2891, 'epoch': 1} {'type': 'loss', 'content': 0.12085586786270142, 'timestamp': '2025-09-30 22:15:38.686133', 'step': 2892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:38.718996', 'step': 2892, 'epoch': 1} {'type': 'loss', 'content': 0.11769834160804749, 'timestamp': '2025-09-30 22:15:38.728480', 'step': 2893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.767164', 'step': 2893, 'epoch': 1} {'type': 'loss', 'content': 0.16065733134746552, 'timestamp': '2025-09-30 22:15:38.769916', 'step': 2894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.803267', 'step': 2894, 'epoch': 1} {'type': 'loss', 'content': 0.10869129002094269, 'timestamp': '2025-09-30 22:15:38.805938', 'step': 2895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:15:38.842980', 'step': 2895, 'epoch': 1} {'type': 'loss', 'content': 0.18677741289138794, 'timestamp': '2025-09-30 22:15:38.870847', 'step': 2896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.906375', 'step': 2896, 'epoch': 1} {'type': 'loss', 'content': 0.05882951244711876, 'timestamp': '2025-09-30 22:15:38.917706', 'step': 2897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.948551', 'step': 2897, 'epoch': 1} {'type': 'loss', 'content': 0.17167352139949799, 'timestamp': '2025-09-30 22:15:38.952310', 'step': 2898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:38.983629', 'step': 2898, 'epoch': 1} {'type': 'loss', 'content': 0.220789834856987, 'timestamp': '2025-09-30 22:15:38.990482', 'step': 2899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.021708', 'step': 2899, 'epoch': 1} {'type': 'loss', 'content': 0.16752558946609497, 'timestamp': '2025-09-30 22:15:39.056505', 'step': 2900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.091481', 'step': 2900, 'epoch': 1} {'type': 'loss', 'content': 0.13750244677066803, 'timestamp': '2025-09-30 22:15:39.094365', 'step': 2901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.126754', 'step': 2901, 'epoch': 1} {'type': 'loss', 'content': 0.140238419175148, 'timestamp': '2025-09-30 22:15:39.129321', 'step': 2902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.174171', 'step': 2902, 'epoch': 1} {'type': 'loss', 'content': 0.2204626500606537, 'timestamp': '2025-09-30 22:15:39.178575', 'step': 2903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.210064', 'step': 2903, 'epoch': 1} {'type': 'loss', 'content': 0.09474632143974304, 'timestamp': '2025-09-30 22:15:39.233886', 'step': 2904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:39.266887', 'step': 2904, 'epoch': 1} {'type': 'loss', 'content': 0.1962689757347107, 'timestamp': '2025-09-30 22:15:39.271533', 'step': 2905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.304270', 'step': 2905, 'epoch': 1} {'type': 'loss', 'content': 0.17784444987773895, 'timestamp': '2025-09-30 22:15:39.309243', 'step': 2906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.347879', 'step': 2906, 'epoch': 1} {'type': 'loss', 'content': 0.11135847121477127, 'timestamp': '2025-09-30 22:15:39.352342', 'step': 2907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.385396', 'step': 2907, 'epoch': 1} {'type': 'loss', 'content': 0.1972513645887375, 'timestamp': '2025-09-30 22:15:39.409923', 'step': 2908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.440624', 'step': 2908, 'epoch': 1} {'type': 'loss', 'content': 0.204132080078125, 'timestamp': '2025-09-30 22:15:39.443994', 'step': 2909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.480678', 'step': 2909, 'epoch': 1} {'type': 'loss', 'content': 0.13325172662734985, 'timestamp': '2025-09-30 22:15:39.483705', 'step': 2910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.519012', 'step': 2910, 'epoch': 1} {'type': 'loss', 'content': 0.16075550019741058, 'timestamp': '2025-09-30 22:15:39.526451', 'step': 2911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:39.561182', 'step': 2911, 'epoch': 1} {'type': 'loss', 'content': 0.20340722799301147, 'timestamp': '2025-09-30 22:15:39.588407', 'step': 2912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.623372', 'step': 2912, 'epoch': 1} {'type': 'loss', 'content': 0.15166884660720825, 'timestamp': '2025-09-30 22:15:39.625652', 'step': 2913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.658904', 'step': 2913, 'epoch': 1} {'type': 'loss', 'content': 0.19048160314559937, 'timestamp': '2025-09-30 22:15:39.663692', 'step': 2914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.697037', 'step': 2914, 'epoch': 1} {'type': 'loss', 'content': 0.16122184693813324, 'timestamp': '2025-09-30 22:15:39.700441', 'step': 2915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:39.736089', 'step': 2915, 'epoch': 1} {'type': 'loss', 'content': 0.17184431850910187, 'timestamp': '2025-09-30 22:15:39.761797', 'step': 2916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.793063', 'step': 2916, 'epoch': 1} {'type': 'loss', 'content': 0.12738464772701263, 'timestamp': '2025-09-30 22:15:39.796402', 'step': 2917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:39.831100', 'step': 2917, 'epoch': 1} {'type': 'loss', 'content': 0.19876928627490997, 'timestamp': '2025-09-30 22:15:39.838816', 'step': 2918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:39.885843', 'step': 2918, 'epoch': 1} {'type': 'loss', 'content': 0.1777452528476715, 'timestamp': '2025-09-30 22:15:39.892490', 'step': 2919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:39.934489', 'step': 2919, 'epoch': 1} {'type': 'loss', 'content': 0.21192839741706848, 'timestamp': '2025-09-30 22:15:39.963327', 'step': 2920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:39.994085', 'step': 2920, 'epoch': 1} {'type': 'loss', 'content': 0.1761917918920517, 'timestamp': '2025-09-30 22:15:39.997490', 'step': 2921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.031914', 'step': 2921, 'epoch': 1} {'type': 'loss', 'content': 0.15530280768871307, 'timestamp': '2025-09-30 22:15:40.034482', 'step': 2922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:40.068681', 'step': 2922, 'epoch': 1} {'type': 'loss', 'content': 0.1930026412010193, 'timestamp': '2025-09-30 22:15:40.071482', 'step': 2923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:40.104002', 'step': 2923, 'epoch': 1} {'type': 'loss', 'content': 0.18983440101146698, 'timestamp': '2025-09-30 22:15:40.130940', 'step': 2924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:40.163723', 'step': 2924, 'epoch': 1} {'type': 'loss', 'content': 0.22614122927188873, 'timestamp': '2025-09-30 22:15:40.168543', 'step': 2925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:40.201228', 'step': 2925, 'epoch': 1} {'type': 'loss', 'content': 0.18565203249454498, 'timestamp': '2025-09-30 22:15:40.206121', 'step': 2926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.238492', 'step': 2926, 'epoch': 1} {'type': 'loss', 'content': 0.09572756290435791, 'timestamp': '2025-09-30 22:15:40.242054', 'step': 2927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.274173', 'step': 2927, 'epoch': 1} {'type': 'loss', 'content': 0.1723296195268631, 'timestamp': '2025-09-30 22:15:40.299357', 'step': 2928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.332051', 'step': 2928, 'epoch': 1} {'type': 'loss', 'content': 0.2202076017856598, 'timestamp': '2025-09-30 22:15:40.334892', 'step': 2929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.365156', 'step': 2929, 'epoch': 1} {'type': 'loss', 'content': 0.1730767786502838, 'timestamp': '2025-09-30 22:15:40.369855', 'step': 2930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.400820', 'step': 2930, 'epoch': 1} {'type': 'loss', 'content': 0.15397076308727264, 'timestamp': '2025-09-30 22:15:40.403975', 'step': 2931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.436523', 'step': 2931, 'epoch': 1} {'type': 'loss', 'content': 0.20553703606128693, 'timestamp': '2025-09-30 22:15:40.463202', 'step': 2932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.498380', 'step': 2932, 'epoch': 1} {'type': 'loss', 'content': 0.1747056543827057, 'timestamp': '2025-09-30 22:15:40.502723', 'step': 2933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.536948', 'step': 2933, 'epoch': 1} {'type': 'loss', 'content': 0.24988803267478943, 'timestamp': '2025-09-30 22:15:40.541724', 'step': 2934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.574221', 'step': 2934, 'epoch': 1} {'type': 'loss', 'content': 0.142011821269989, 'timestamp': '2025-09-30 22:15:40.577021', 'step': 2935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:15:40.610180', 'step': 2935, 'epoch': 1} {'type': 'loss', 'content': 0.2217499017715454, 'timestamp': '2025-09-30 22:15:40.635866', 'step': 2936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:40.666557', 'step': 2936, 'epoch': 1} {'type': 'loss', 'content': 0.19264118373394012, 'timestamp': '2025-09-30 22:15:40.671870', 'step': 2937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:40.705995', 'step': 2937, 'epoch': 1} {'type': 'loss', 'content': 0.23233874142169952, 'timestamp': '2025-09-30 22:15:40.708580', 'step': 2938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.745380', 'step': 2938, 'epoch': 1} {'type': 'loss', 'content': 0.18917237222194672, 'timestamp': '2025-09-30 22:15:40.750425', 'step': 2939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.783325', 'step': 2939, 'epoch': 1} {'type': 'loss', 'content': 0.24072732031345367, 'timestamp': '2025-09-30 22:15:40.814952', 'step': 2940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.848012', 'step': 2940, 'epoch': 1} {'type': 'loss', 'content': 0.19310548901557922, 'timestamp': '2025-09-30 22:15:40.852559', 'step': 2941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.886831', 'step': 2941, 'epoch': 1} {'type': 'loss', 'content': 0.1195661723613739, 'timestamp': '2025-09-30 22:15:40.890940', 'step': 2942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:40.923672', 'step': 2942, 'epoch': 1} {'type': 'loss', 'content': 0.14170148968696594, 'timestamp': '2025-09-30 22:15:40.928519', 'step': 2943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:40.965273', 'step': 2943, 'epoch': 1} {'type': 'loss', 'content': 0.08986964076757431, 'timestamp': '2025-09-30 22:15:40.989918', 'step': 2944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.021262', 'step': 2944, 'epoch': 1} {'type': 'loss', 'content': 0.1403561681509018, 'timestamp': '2025-09-30 22:15:41.024830', 'step': 2945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:41.059070', 'step': 2945, 'epoch': 1} {'type': 'loss', 'content': 0.14243847131729126, 'timestamp': '2025-09-30 22:15:41.063603', 'step': 2946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:41.100914', 'step': 2946, 'epoch': 1} {'type': 'loss', 'content': 0.17008697986602783, 'timestamp': '2025-09-30 22:15:41.106320', 'step': 2947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.138633', 'step': 2947, 'epoch': 1} {'type': 'loss', 'content': 0.14259479939937592, 'timestamp': '2025-09-30 22:15:41.162564', 'step': 2948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.193844', 'step': 2948, 'epoch': 1} {'type': 'loss', 'content': 0.08415690064430237, 'timestamp': '2025-09-30 22:15:41.200787', 'step': 2949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.233471', 'step': 2949, 'epoch': 1} {'type': 'loss', 'content': 0.12501788139343262, 'timestamp': '2025-09-30 22:15:41.237342', 'step': 2950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.273740', 'step': 2950, 'epoch': 1} {'type': 'loss', 'content': 0.18211345374584198, 'timestamp': '2025-09-30 22:15:41.276552', 'step': 2951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:41.308594', 'step': 2951, 'epoch': 1} {'type': 'loss', 'content': 0.21834498643875122, 'timestamp': '2025-09-30 22:15:41.333328', 'step': 2952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.364035', 'step': 2952, 'epoch': 1} {'type': 'loss', 'content': 0.18331973254680634, 'timestamp': '2025-09-30 22:15:41.368775', 'step': 2953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.400499', 'step': 2953, 'epoch': 1} {'type': 'loss', 'content': 0.15384450554847717, 'timestamp': '2025-09-30 22:15:41.403149', 'step': 2954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:41.434229', 'step': 2954, 'epoch': 1} {'type': 'loss', 'content': 0.18756550550460815, 'timestamp': '2025-09-30 22:15:41.439774', 'step': 2955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.471160', 'step': 2955, 'epoch': 1} {'type': 'loss', 'content': 0.11081746965646744, 'timestamp': '2025-09-30 22:15:41.497278', 'step': 2956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.530332', 'step': 2956, 'epoch': 1} {'type': 'loss', 'content': 0.19181056320667267, 'timestamp': '2025-09-30 22:15:41.532994', 'step': 2957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:41.564755', 'step': 2957, 'epoch': 1} {'type': 'loss', 'content': 0.242806077003479, 'timestamp': '2025-09-30 22:15:41.569901', 'step': 2958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.602996', 'step': 2958, 'epoch': 1} {'type': 'loss', 'content': 0.1963772028684616, 'timestamp': '2025-09-30 22:15:41.606917', 'step': 2959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.637526', 'step': 2959, 'epoch': 1} {'type': 'loss', 'content': 0.18751360476016998, 'timestamp': '2025-09-30 22:15:41.661478', 'step': 2960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.693089', 'step': 2960, 'epoch': 1} {'type': 'loss', 'content': 0.17149528861045837, 'timestamp': '2025-09-30 22:15:41.697294', 'step': 2961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.730786', 'step': 2961, 'epoch': 1} {'type': 'loss', 'content': 0.17895303666591644, 'timestamp': '2025-09-30 22:15:41.735264', 'step': 2962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:41.768279', 'step': 2962, 'epoch': 1} {'type': 'loss', 'content': 0.1652018427848816, 'timestamp': '2025-09-30 22:15:41.772629', 'step': 2963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.805160', 'step': 2963, 'epoch': 1} {'type': 'loss', 'content': 0.14272639155387878, 'timestamp': '2025-09-30 22:15:41.829124', 'step': 2964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.859799', 'step': 2964, 'epoch': 1} {'type': 'loss', 'content': 0.1835591048002243, 'timestamp': '2025-09-30 22:15:41.867553', 'step': 2965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:41.908165', 'step': 2965, 'epoch': 1} {'type': 'loss', 'content': 0.2197882980108261, 'timestamp': '2025-09-30 22:15:41.912146', 'step': 2966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:41.947918', 'step': 2966, 'epoch': 1} {'type': 'loss', 'content': 0.14831382036209106, 'timestamp': '2025-09-30 22:15:41.951007', 'step': 2967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:41.988217', 'step': 2967, 'epoch': 1} {'type': 'loss', 'content': 0.11464893817901611, 'timestamp': '2025-09-30 22:15:42.013231', 'step': 2968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:42.044298', 'step': 2968, 'epoch': 1} {'type': 'loss', 'content': 0.14306804537773132, 'timestamp': '2025-09-30 22:15:42.049280', 'step': 2969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:42.082632', 'step': 2969, 'epoch': 1} {'type': 'loss', 'content': 0.12879352271556854, 'timestamp': '2025-09-30 22:15:42.085343', 'step': 2970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:42.117163', 'step': 2970, 'epoch': 1} {'type': 'loss', 'content': 0.23418055474758148, 'timestamp': '2025-09-30 22:15:42.120005', 'step': 2971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:42.152165', 'step': 2971, 'epoch': 1} {'type': 'loss', 'content': 0.171965554356575, 'timestamp': '2025-09-30 22:15:42.182073', 'step': 2972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:42.236686', 'step': 2972, 'epoch': 1} {'type': 'loss', 'content': 0.15090513229370117, 'timestamp': '2025-09-30 22:15:42.254943', 'step': 2973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:42.333328', 'step': 2973, 'epoch': 1} {'type': 'loss', 'content': 0.1324121206998825, 'timestamp': '2025-09-30 22:15:42.371469', 'step': 2974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:42.415669', 'step': 2974, 'epoch': 1} {'type': 'loss', 'content': 0.17029143869876862, 'timestamp': '2025-09-30 22:15:42.434269', 'step': 2975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:42.474718', 'step': 2975, 'epoch': 1} {'type': 'loss', 'content': 0.11777080595493317, 'timestamp': '2025-09-30 22:15:42.517064', 'step': 2976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:15:42.562437', 'step': 2976, 'epoch': 1} {'type': 'loss', 'content': 0.18658386170864105, 'timestamp': '2025-09-30 22:15:42.574855', 'step': 2977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:42.615721', 'step': 2977, 'epoch': 1} {'type': 'loss', 'content': 0.12000640481710434, 'timestamp': '2025-09-30 22:15:42.630902', 'step': 2978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:42.665800', 'step': 2978, 'epoch': 1} {'type': 'loss', 'content': 0.09592565894126892, 'timestamp': '2025-09-30 22:15:42.677996', 'step': 2979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:42.730357', 'step': 2979, 'epoch': 1} {'type': 'loss', 'content': 0.20390504598617554, 'timestamp': '2025-09-30 22:15:42.759980', 'step': 2980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:42.797372', 'step': 2980, 'epoch': 1} {'type': 'loss', 'content': 0.11420772224664688, 'timestamp': '2025-09-30 22:15:42.816835', 'step': 2981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:42.852085', 'step': 2981, 'epoch': 1} {'type': 'loss', 'content': 0.18087509274482727, 'timestamp': '2025-09-30 22:15:42.859244', 'step': 2982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:42.905976', 'step': 2982, 'epoch': 1} {'type': 'loss', 'content': 0.14665986597537994, 'timestamp': '2025-09-30 22:15:42.942346', 'step': 2983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:42.994324', 'step': 2983, 'epoch': 1} {'type': 'loss', 'content': 0.25173306465148926, 'timestamp': '2025-09-30 22:15:43.025015', 'step': 2984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:43.078570', 'step': 2984, 'epoch': 1} {'type': 'loss', 'content': 0.11590031534433365, 'timestamp': '2025-09-30 22:15:43.095308', 'step': 2985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:43.154760', 'step': 2985, 'epoch': 1} {'type': 'loss', 'content': 0.17365765571594238, 'timestamp': '2025-09-30 22:15:43.164996', 'step': 2986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:43.214332', 'step': 2986, 'epoch': 1} {'type': 'loss', 'content': 0.08366695791482925, 'timestamp': '2025-09-30 22:15:43.224895', 'step': 2987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:43.267730', 'step': 2987, 'epoch': 1} {'type': 'loss', 'content': 0.1943134069442749, 'timestamp': '2025-09-30 22:15:43.298140', 'step': 2988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:43.356604', 'step': 2988, 'epoch': 1} {'type': 'loss', 'content': 0.2529285252094269, 'timestamp': '2025-09-30 22:15:43.372315', 'step': 2989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:43.408376', 'step': 2989, 'epoch': 1} {'type': 'loss', 'content': 0.17335543036460876, 'timestamp': '2025-09-30 22:15:43.424918', 'step': 2990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:43.461832', 'step': 2990, 'epoch': 1} {'type': 'loss', 'content': 0.14274048805236816, 'timestamp': '2025-09-30 22:15:43.488315', 'step': 2991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:43.557609', 'step': 2991, 'epoch': 1} {'type': 'loss', 'content': 0.22470548748970032, 'timestamp': '2025-09-30 22:15:43.589900', 'step': 2992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:43.623652', 'step': 2992, 'epoch': 1} {'type': 'loss', 'content': 0.2768765091896057, 'timestamp': '2025-09-30 22:15:43.626299', 'step': 2993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:43.658158', 'step': 2993, 'epoch': 1} {'type': 'loss', 'content': 0.21598541736602783, 'timestamp': '2025-09-30 22:15:43.661133', 'step': 2994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:43.693058', 'step': 2994, 'epoch': 1} {'type': 'loss', 'content': 0.21435490250587463, 'timestamp': '2025-09-30 22:15:43.695798', 'step': 2995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:43.726983', 'step': 2995, 'epoch': 1} {'type': 'loss', 'content': 0.16571448743343353, 'timestamp': '2025-09-30 22:15:43.753113', 'step': 2996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:43.785398', 'step': 2996, 'epoch': 1} {'type': 'loss', 'content': 0.1530786007642746, 'timestamp': '2025-09-30 22:15:43.788980', 'step': 2997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:43.821964', 'step': 2997, 'epoch': 1} {'type': 'loss', 'content': 0.1551464945077896, 'timestamp': '2025-09-30 22:15:43.826623', 'step': 2998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:43.859459', 'step': 2998, 'epoch': 1} {'type': 'loss', 'content': 0.13179931044578552, 'timestamp': '2025-09-30 22:15:43.863758', 'step': 2999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:43.897181', 'step': 2999, 'epoch': 1} {'type': 'loss', 'content': 0.13727758824825287, 'timestamp': '2025-09-30 22:15:43.921284', 'step': 3000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3000', 'timestamp': '2025-09-30 22:15:48.863702', 'step': 3000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:48.906833', 'step': 3000, 'epoch': 1} {'type': 'loss', 'content': 0.10605333000421524, 'timestamp': '2025-09-30 22:15:48.912062', 'step': 3001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:48.947403', 'step': 3001, 'epoch': 1} {'type': 'loss', 'content': 0.16558392345905304, 'timestamp': '2025-09-30 22:15:48.952105', 'step': 3002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:48.986125', 'step': 3002, 'epoch': 1} {'type': 'loss', 'content': 0.14391513168811798, 'timestamp': '2025-09-30 22:15:48.990728', 'step': 3003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.021822', 'step': 3003, 'epoch': 1} {'type': 'loss', 'content': 0.13067173957824707, 'timestamp': '2025-09-30 22:15:49.046033', 'step': 3004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:49.082975', 'step': 3004, 'epoch': 1} {'type': 'loss', 'content': 0.09349789470434189, 'timestamp': '2025-09-30 22:15:49.096986', 'step': 3005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.135259', 'step': 3005, 'epoch': 1} {'type': 'loss', 'content': 0.16137759387493134, 'timestamp': '2025-09-30 22:15:49.138515', 'step': 3006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:49.174322', 'step': 3006, 'epoch': 1} {'type': 'loss', 'content': 0.19476209580898285, 'timestamp': '2025-09-30 22:15:49.180120', 'step': 3007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.213794', 'step': 3007, 'epoch': 1} {'type': 'loss', 'content': 0.0747588649392128, 'timestamp': '2025-09-30 22:15:49.243980', 'step': 3008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:49.280790', 'step': 3008, 'epoch': 1} {'type': 'loss', 'content': 0.2000003159046173, 'timestamp': '2025-09-30 22:15:49.287377', 'step': 3009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:49.322298', 'step': 3009, 'epoch': 1} {'type': 'loss', 'content': 0.2651744484901428, 'timestamp': '2025-09-30 22:15:49.328117', 'step': 3010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:49.367060', 'step': 3010, 'epoch': 1} {'type': 'loss', 'content': 0.14368508756160736, 'timestamp': '2025-09-30 22:15:49.371459', 'step': 3011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.404559', 'step': 3011, 'epoch': 1} {'type': 'loss', 'content': 0.0761902704834938, 'timestamp': '2025-09-30 22:15:49.429987', 'step': 3012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.465289', 'step': 3012, 'epoch': 1} {'type': 'loss', 'content': 0.15443947911262512, 'timestamp': '2025-09-30 22:15:49.470987', 'step': 3013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:49.502093', 'step': 3013, 'epoch': 1} {'type': 'loss', 'content': 0.11592181026935577, 'timestamp': '2025-09-30 22:15:49.504254', 'step': 3014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.538006', 'step': 3014, 'epoch': 1} {'type': 'loss', 'content': 0.13662415742874146, 'timestamp': '2025-09-30 22:15:49.545576', 'step': 3015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.580840', 'step': 3015, 'epoch': 1} {'type': 'loss', 'content': 0.23815307021141052, 'timestamp': '2025-09-30 22:15:49.607381', 'step': 3016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:49.638010', 'step': 3016, 'epoch': 1} {'type': 'loss', 'content': 0.18981419503688812, 'timestamp': '2025-09-30 22:15:49.644232', 'step': 3017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:49.679719', 'step': 3017, 'epoch': 1} {'type': 'loss', 'content': 0.2108801305294037, 'timestamp': '2025-09-30 22:15:49.684613', 'step': 3018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.715482', 'step': 3018, 'epoch': 1} {'type': 'loss', 'content': 0.10311286896467209, 'timestamp': '2025-09-30 22:15:49.729348', 'step': 3019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:49.775096', 'step': 3019, 'epoch': 1} {'type': 'loss', 'content': 0.1545286625623703, 'timestamp': '2025-09-30 22:15:49.801649', 'step': 3020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.833015', 'step': 3020, 'epoch': 1} {'type': 'loss', 'content': 0.15243712067604065, 'timestamp': '2025-09-30 22:15:49.837226', 'step': 3021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:49.876598', 'step': 3021, 'epoch': 1} {'type': 'loss', 'content': 0.13193942606449127, 'timestamp': '2025-09-30 22:15:49.884527', 'step': 3022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:49.918781', 'step': 3022, 'epoch': 1} {'type': 'loss', 'content': 0.21156325936317444, 'timestamp': '2025-09-30 22:15:49.922222', 'step': 3023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:49.959862', 'step': 3023, 'epoch': 1} {'type': 'loss', 'content': 0.20723243057727814, 'timestamp': '2025-09-30 22:15:49.985810', 'step': 3024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:50.022125', 'step': 3024, 'epoch': 1} {'type': 'loss', 'content': 0.18490035831928253, 'timestamp': '2025-09-30 22:15:50.035706', 'step': 3025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.067606', 'step': 3025, 'epoch': 1} {'type': 'loss', 'content': 0.08915219455957413, 'timestamp': '2025-09-30 22:15:50.070997', 'step': 3026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:50.106895', 'step': 3026, 'epoch': 1} {'type': 'loss', 'content': 0.20879660546779633, 'timestamp': '2025-09-30 22:15:50.112575', 'step': 3027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:50.159097', 'step': 3027, 'epoch': 1} {'type': 'loss', 'content': 0.22079193592071533, 'timestamp': '2025-09-30 22:15:50.183939', 'step': 3028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.224327', 'step': 3028, 'epoch': 1} {'type': 'loss', 'content': 0.11184252798557281, 'timestamp': '2025-09-30 22:15:50.230943', 'step': 3029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.265063', 'step': 3029, 'epoch': 1} {'type': 'loss', 'content': 0.20067796111106873, 'timestamp': '2025-09-30 22:15:50.277520', 'step': 3030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.319652', 'step': 3030, 'epoch': 1} {'type': 'loss', 'content': 0.1574641913175583, 'timestamp': '2025-09-30 22:15:50.345118', 'step': 3031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.409420', 'step': 3031, 'epoch': 1} {'type': 'loss', 'content': 0.1460348218679428, 'timestamp': '2025-09-30 22:15:50.434760', 'step': 3032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:50.466307', 'step': 3032, 'epoch': 1} {'type': 'loss', 'content': 0.17556990683078766, 'timestamp': '2025-09-30 22:15:50.469162', 'step': 3033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.512906', 'step': 3033, 'epoch': 1} {'type': 'loss', 'content': 0.1784115582704544, 'timestamp': '2025-09-30 22:15:50.515806', 'step': 3034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:50.549490', 'step': 3034, 'epoch': 1} {'type': 'loss', 'content': 0.14029881358146667, 'timestamp': '2025-09-30 22:15:50.561275', 'step': 3035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:50.597155', 'step': 3035, 'epoch': 1} {'type': 'loss', 'content': 0.19482006132602692, 'timestamp': '2025-09-30 22:15:50.621845', 'step': 3036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:50.657329', 'step': 3036, 'epoch': 1} {'type': 'loss', 'content': 0.13896650075912476, 'timestamp': '2025-09-30 22:15:50.665472', 'step': 3037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:50.703958', 'step': 3037, 'epoch': 1} {'type': 'loss', 'content': 0.24770976603031158, 'timestamp': '2025-09-30 22:15:50.708404', 'step': 3038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.739962', 'step': 3038, 'epoch': 1} {'type': 'loss', 'content': 0.1261054426431656, 'timestamp': '2025-09-30 22:15:50.742462', 'step': 3039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:50.777272', 'step': 3039, 'epoch': 1} {'type': 'loss', 'content': 0.23935820162296295, 'timestamp': '2025-09-30 22:15:50.800929', 'step': 3040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:50.835261', 'step': 3040, 'epoch': 1} {'type': 'loss', 'content': 0.09343470633029938, 'timestamp': '2025-09-30 22:15:50.838501', 'step': 3041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.871544', 'step': 3041, 'epoch': 1} {'type': 'loss', 'content': 0.15212731063365936, 'timestamp': '2025-09-30 22:15:50.875440', 'step': 3042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:50.914557', 'step': 3042, 'epoch': 1} {'type': 'loss', 'content': 0.19828712940216064, 'timestamp': '2025-09-30 22:15:50.918900', 'step': 3043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:50.952322', 'step': 3043, 'epoch': 1} {'type': 'loss', 'content': 0.2108733057975769, 'timestamp': '2025-09-30 22:15:50.978847', 'step': 3044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.009680', 'step': 3044, 'epoch': 1} {'type': 'loss', 'content': 0.2607403099536896, 'timestamp': '2025-09-30 22:15:51.014245', 'step': 3045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.067336', 'step': 3045, 'epoch': 1} {'type': 'loss', 'content': 0.11493377387523651, 'timestamp': '2025-09-30 22:15:51.070324', 'step': 3046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.101382', 'step': 3046, 'epoch': 1} {'type': 'loss', 'content': 0.24723409116268158, 'timestamp': '2025-09-30 22:15:51.105619', 'step': 3047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:51.138065', 'step': 3047, 'epoch': 1} {'type': 'loss', 'content': 0.12934215366840363, 'timestamp': '2025-09-30 22:15:51.161757', 'step': 3048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.193411', 'step': 3048, 'epoch': 1} {'type': 'loss', 'content': 0.13315728306770325, 'timestamp': '2025-09-30 22:15:51.200772', 'step': 3049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.231521', 'step': 3049, 'epoch': 1} {'type': 'loss', 'content': 0.10113634169101715, 'timestamp': '2025-09-30 22:15:51.237145', 'step': 3050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.269887', 'step': 3050, 'epoch': 1} {'type': 'loss', 'content': 0.1753852814435959, 'timestamp': '2025-09-30 22:15:51.275524', 'step': 3051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.308664', 'step': 3051, 'epoch': 1} {'type': 'loss', 'content': 0.17867761850357056, 'timestamp': '2025-09-30 22:15:51.333416', 'step': 3052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.364344', 'step': 3052, 'epoch': 1} {'type': 'loss', 'content': 0.22044022381305695, 'timestamp': '2025-09-30 22:15:51.367909', 'step': 3053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.400272', 'step': 3053, 'epoch': 1} {'type': 'loss', 'content': 0.19828428328037262, 'timestamp': '2025-09-30 22:15:51.402979', 'step': 3054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.435262', 'step': 3054, 'epoch': 1} {'type': 'loss', 'content': 0.12949734926223755, 'timestamp': '2025-09-30 22:15:51.438162', 'step': 3055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.470329', 'step': 3055, 'epoch': 1} {'type': 'loss', 'content': 0.18891541659832, 'timestamp': '2025-09-30 22:15:51.494255', 'step': 3056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.526419', 'step': 3056, 'epoch': 1} {'type': 'loss', 'content': 0.09124381840229034, 'timestamp': '2025-09-30 22:15:51.530578', 'step': 3057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.563714', 'step': 3057, 'epoch': 1} {'type': 'loss', 'content': 0.3043842017650604, 'timestamp': '2025-09-30 22:15:51.566267', 'step': 3058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.603440', 'step': 3058, 'epoch': 1} {'type': 'loss', 'content': 0.14117538928985596, 'timestamp': '2025-09-30 22:15:51.609539', 'step': 3059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.641938', 'step': 3059, 'epoch': 1} {'type': 'loss', 'content': 0.2654103636741638, 'timestamp': '2025-09-30 22:15:51.668525', 'step': 3060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:51.700029', 'step': 3060, 'epoch': 1} {'type': 'loss', 'content': 0.0687609389424324, 'timestamp': '2025-09-30 22:15:51.703984', 'step': 3061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.747000', 'step': 3061, 'epoch': 1} {'type': 'loss', 'content': 0.1264778971672058, 'timestamp': '2025-09-30 22:15:51.750089', 'step': 3062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.781823', 'step': 3062, 'epoch': 1} {'type': 'loss', 'content': 0.24194207787513733, 'timestamp': '2025-09-30 22:15:51.784375', 'step': 3063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:51.815695', 'step': 3063, 'epoch': 1} {'type': 'loss', 'content': 0.13645288348197937, 'timestamp': '2025-09-30 22:15:51.839539', 'step': 3064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.874305', 'step': 3064, 'epoch': 1} {'type': 'loss', 'content': 0.19433444738388062, 'timestamp': '2025-09-30 22:15:51.876717', 'step': 3065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.907888', 'step': 3065, 'epoch': 1} {'type': 'loss', 'content': 0.1045287474989891, 'timestamp': '2025-09-30 22:15:51.912929', 'step': 3066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:51.946163', 'step': 3066, 'epoch': 1} {'type': 'loss', 'content': 0.1657092571258545, 'timestamp': '2025-09-30 22:15:51.949900', 'step': 3067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:51.983082', 'step': 3067, 'epoch': 1} {'type': 'loss', 'content': 0.12636280059814453, 'timestamp': '2025-09-30 22:15:52.014030', 'step': 3068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.045442', 'step': 3068, 'epoch': 1} {'type': 'loss', 'content': 0.2512165904045105, 'timestamp': '2025-09-30 22:15:52.048544', 'step': 3069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.080196', 'step': 3069, 'epoch': 1} {'type': 'loss', 'content': 0.21818549931049347, 'timestamp': '2025-09-30 22:15:52.082619', 'step': 3070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.118927', 'step': 3070, 'epoch': 1} {'type': 'loss', 'content': 0.2150745391845703, 'timestamp': '2025-09-30 22:15:52.121578', 'step': 3071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.159313', 'step': 3071, 'epoch': 1} {'type': 'loss', 'content': 0.25532835721969604, 'timestamp': '2025-09-30 22:15:52.188960', 'step': 3072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.224923', 'step': 3072, 'epoch': 1} {'type': 'loss', 'content': 0.22065085172653198, 'timestamp': '2025-09-30 22:15:52.230833', 'step': 3073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.262089', 'step': 3073, 'epoch': 1} {'type': 'loss', 'content': 0.11816819757223129, 'timestamp': '2025-09-30 22:15:52.268647', 'step': 3074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:52.303263', 'step': 3074, 'epoch': 1} {'type': 'loss', 'content': 0.13678838312625885, 'timestamp': '2025-09-30 22:15:52.307619', 'step': 3075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.345386', 'step': 3075, 'epoch': 1} {'type': 'loss', 'content': 0.1071523055434227, 'timestamp': '2025-09-30 22:15:52.369505', 'step': 3076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:52.407405', 'step': 3076, 'epoch': 1} {'type': 'loss', 'content': 0.16222219169139862, 'timestamp': '2025-09-30 22:15:52.420553', 'step': 3077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.454403', 'step': 3077, 'epoch': 1} {'type': 'loss', 'content': 0.14840388298034668, 'timestamp': '2025-09-30 22:15:52.462380', 'step': 3078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.498635', 'step': 3078, 'epoch': 1} {'type': 'loss', 'content': 0.11977986991405487, 'timestamp': '2025-09-30 22:15:52.502405', 'step': 3079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.545479', 'step': 3079, 'epoch': 1} {'type': 'loss', 'content': 0.26660770177841187, 'timestamp': '2025-09-30 22:15:52.574382', 'step': 3080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.613313', 'step': 3080, 'epoch': 1} {'type': 'loss', 'content': 0.23014740645885468, 'timestamp': '2025-09-30 22:15:52.626690', 'step': 3081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.665565', 'step': 3081, 'epoch': 1} {'type': 'loss', 'content': 0.20630645751953125, 'timestamp': '2025-09-30 22:15:52.675182', 'step': 3082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.709156', 'step': 3082, 'epoch': 1} {'type': 'loss', 'content': 0.17509080469608307, 'timestamp': '2025-09-30 22:15:52.712226', 'step': 3083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.744048', 'step': 3083, 'epoch': 1} {'type': 'loss', 'content': 0.14807799458503723, 'timestamp': '2025-09-30 22:15:52.767979', 'step': 3084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:52.801440', 'step': 3084, 'epoch': 1} {'type': 'loss', 'content': 0.07350461184978485, 'timestamp': '2025-09-30 22:15:52.804358', 'step': 3085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:52.838321', 'step': 3085, 'epoch': 1} {'type': 'loss', 'content': 0.20887278020381927, 'timestamp': '2025-09-30 22:15:52.844667', 'step': 3086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:52.883934', 'step': 3086, 'epoch': 1} {'type': 'loss', 'content': 0.1574293076992035, 'timestamp': '2025-09-30 22:15:52.893316', 'step': 3087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:52.924076', 'step': 3087, 'epoch': 1} {'type': 'loss', 'content': 0.2236272543668747, 'timestamp': '2025-09-30 22:15:52.948631', 'step': 3088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:52.979598', 'step': 3088, 'epoch': 1} {'type': 'loss', 'content': 0.27628201246261597, 'timestamp': '2025-09-30 22:15:52.982345', 'step': 3089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:53.018734', 'step': 3089, 'epoch': 1} {'type': 'loss', 'content': 0.11014527082443237, 'timestamp': '2025-09-30 22:15:53.026210', 'step': 3090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.061532', 'step': 3090, 'epoch': 1} {'type': 'loss', 'content': 0.20645612478256226, 'timestamp': '2025-09-30 22:15:53.064331', 'step': 3091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:53.097308', 'step': 3091, 'epoch': 1} {'type': 'loss', 'content': 0.16238190233707428, 'timestamp': '2025-09-30 22:15:53.124254', 'step': 3092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.155305', 'step': 3092, 'epoch': 1} {'type': 'loss', 'content': 0.20661936700344086, 'timestamp': '2025-09-30 22:15:53.158555', 'step': 3093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.192912', 'step': 3093, 'epoch': 1} {'type': 'loss', 'content': 0.11040639132261276, 'timestamp': '2025-09-30 22:15:53.198339', 'step': 3094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.231645', 'step': 3094, 'epoch': 1} {'type': 'loss', 'content': 0.12923836708068848, 'timestamp': '2025-09-30 22:15:53.234617', 'step': 3095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.268260', 'step': 3095, 'epoch': 1} {'type': 'loss', 'content': 0.1753072887659073, 'timestamp': '2025-09-30 22:15:53.293327', 'step': 3096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.331784', 'step': 3096, 'epoch': 1} {'type': 'loss', 'content': 0.09265757352113724, 'timestamp': '2025-09-30 22:15:53.336118', 'step': 3097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.367047', 'step': 3097, 'epoch': 1} {'type': 'loss', 'content': 0.22547729313373566, 'timestamp': '2025-09-30 22:15:53.375255', 'step': 3098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.406868', 'step': 3098, 'epoch': 1} {'type': 'loss', 'content': 0.1537596732378006, 'timestamp': '2025-09-30 22:15:53.410237', 'step': 3099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:53.440985', 'step': 3099, 'epoch': 1} {'type': 'loss', 'content': 0.19748397171497345, 'timestamp': '2025-09-30 22:15:53.467241', 'step': 3100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:53.503558', 'step': 3100, 'epoch': 1} {'type': 'loss', 'content': 0.220452219247818, 'timestamp': '2025-09-30 22:15:53.510420', 'step': 3101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.542272', 'step': 3101, 'epoch': 1} {'type': 'loss', 'content': 0.10304997116327286, 'timestamp': '2025-09-30 22:15:53.553140', 'step': 3102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.583800', 'step': 3102, 'epoch': 1} {'type': 'loss', 'content': 0.20372310280799866, 'timestamp': '2025-09-30 22:15:53.586120', 'step': 3103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.620826', 'step': 3103, 'epoch': 1} {'type': 'loss', 'content': 0.10165643692016602, 'timestamp': '2025-09-30 22:15:53.645058', 'step': 3104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:53.676353', 'step': 3104, 'epoch': 1} {'type': 'loss', 'content': 0.22181300818920135, 'timestamp': '2025-09-30 22:15:53.682008', 'step': 3105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:53.717555', 'step': 3105, 'epoch': 1} {'type': 'loss', 'content': 0.16781160235404968, 'timestamp': '2025-09-30 22:15:53.721294', 'step': 3106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:53.752104', 'step': 3106, 'epoch': 1} {'type': 'loss', 'content': 0.11876536905765533, 'timestamp': '2025-09-30 22:15:53.755167', 'step': 3107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.788640', 'step': 3107, 'epoch': 1} {'type': 'loss', 'content': 0.11597216129302979, 'timestamp': '2025-09-30 22:15:53.812386', 'step': 3108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.846966', 'step': 3108, 'epoch': 1} {'type': 'loss', 'content': 0.1497543305158615, 'timestamp': '2025-09-30 22:15:53.850278', 'step': 3109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.895037', 'step': 3109, 'epoch': 1} {'type': 'loss', 'content': 0.1498112827539444, 'timestamp': '2025-09-30 22:15:53.897715', 'step': 3110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:53.928255', 'step': 3110, 'epoch': 1} {'type': 'loss', 'content': 0.12330566346645355, 'timestamp': '2025-09-30 22:15:53.932639', 'step': 3111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:53.963264', 'step': 3111, 'epoch': 1} {'type': 'loss', 'content': 0.20276716351509094, 'timestamp': '2025-09-30 22:15:53.987082', 'step': 3112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:54.021360', 'step': 3112, 'epoch': 1} {'type': 'loss', 'content': 0.127859428524971, 'timestamp': '2025-09-30 22:15:54.029175', 'step': 3113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.065561', 'step': 3113, 'epoch': 1} {'type': 'loss', 'content': 0.17221160233020782, 'timestamp': '2025-09-30 22:15:54.068426', 'step': 3114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.103878', 'step': 3114, 'epoch': 1} {'type': 'loss', 'content': 0.13767094910144806, 'timestamp': '2025-09-30 22:15:54.107873', 'step': 3115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:54.138220', 'step': 3115, 'epoch': 1} {'type': 'loss', 'content': 0.30090850591659546, 'timestamp': '2025-09-30 22:15:54.162529', 'step': 3116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.194125', 'step': 3116, 'epoch': 1} {'type': 'loss', 'content': 0.18346397578716278, 'timestamp': '2025-09-30 22:15:54.198533', 'step': 3117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:54.229400', 'step': 3117, 'epoch': 1} {'type': 'loss', 'content': 0.1244007796049118, 'timestamp': '2025-09-30 22:15:54.236541', 'step': 3118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:54.268401', 'step': 3118, 'epoch': 1} {'type': 'loss', 'content': 0.15037868916988373, 'timestamp': '2025-09-30 22:15:54.271522', 'step': 3119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:54.306746', 'step': 3119, 'epoch': 1} {'type': 'loss', 'content': 0.1166813001036644, 'timestamp': '2025-09-30 22:15:54.332172', 'step': 3120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:54.365272', 'step': 3120, 'epoch': 1} {'type': 'loss', 'content': 0.13068994879722595, 'timestamp': '2025-09-30 22:15:54.368017', 'step': 3121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:54.401377', 'step': 3121, 'epoch': 1} {'type': 'loss', 'content': 0.18746645748615265, 'timestamp': '2025-09-30 22:15:54.411465', 'step': 3122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.442742', 'step': 3122, 'epoch': 1} {'type': 'loss', 'content': 0.11621858924627304, 'timestamp': '2025-09-30 22:15:54.446307', 'step': 3123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.477434', 'step': 3123, 'epoch': 1} {'type': 'loss', 'content': 0.13381633162498474, 'timestamp': '2025-09-30 22:15:54.501533', 'step': 3124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:54.532949', 'step': 3124, 'epoch': 1} {'type': 'loss', 'content': 0.13797065615653992, 'timestamp': '2025-09-30 22:15:54.536656', 'step': 3125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:54.568747', 'step': 3125, 'epoch': 1} {'type': 'loss', 'content': 0.1751556545495987, 'timestamp': '2025-09-30 22:15:54.572403', 'step': 3126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:54.602495', 'step': 3126, 'epoch': 1} {'type': 'loss', 'content': 0.16802401840686798, 'timestamp': '2025-09-30 22:15:54.605473', 'step': 3127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:54.635586', 'step': 3127, 'epoch': 1} {'type': 'loss', 'content': 0.12642809748649597, 'timestamp': '2025-09-30 22:15:54.660972', 'step': 3128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:54.692557', 'step': 3128, 'epoch': 1} {'type': 'loss', 'content': 0.1494017243385315, 'timestamp': '2025-09-30 22:15:54.696565', 'step': 3129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.728003', 'step': 3129, 'epoch': 1} {'type': 'loss', 'content': 0.2708892822265625, 'timestamp': '2025-09-30 22:15:54.730339', 'step': 3130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.760268', 'step': 3130, 'epoch': 1} {'type': 'loss', 'content': 0.1444043219089508, 'timestamp': '2025-09-30 22:15:54.764896', 'step': 3131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:54.796577', 'step': 3131, 'epoch': 1} {'type': 'loss', 'content': 0.1297137439250946, 'timestamp': '2025-09-30 22:15:54.822061', 'step': 3132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.853589', 'step': 3132, 'epoch': 1} {'type': 'loss', 'content': 0.20933321118354797, 'timestamp': '2025-09-30 22:15:54.857833', 'step': 3133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:54.889600', 'step': 3133, 'epoch': 1} {'type': 'loss', 'content': 0.148444265127182, 'timestamp': '2025-09-30 22:15:54.893122', 'step': 3134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.942608', 'step': 3134, 'epoch': 1} {'type': 'loss', 'content': 0.201533704996109, 'timestamp': '2025-09-30 22:15:54.946449', 'step': 3135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:54.977564', 'step': 3135, 'epoch': 1} {'type': 'loss', 'content': 0.18384191393852234, 'timestamp': '2025-09-30 22:15:55.001430', 'step': 3136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:55.032938', 'step': 3136, 'epoch': 1} {'type': 'loss', 'content': 0.1623670756816864, 'timestamp': '2025-09-30 22:15:55.037392', 'step': 3137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.071053', 'step': 3137, 'epoch': 1} {'type': 'loss', 'content': 0.11476724594831467, 'timestamp': '2025-09-30 22:15:55.076016', 'step': 3138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:55.110560', 'step': 3138, 'epoch': 1} {'type': 'loss', 'content': 0.1256927251815796, 'timestamp': '2025-09-30 22:15:55.114279', 'step': 3139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.146501', 'step': 3139, 'epoch': 1} {'type': 'loss', 'content': 0.20994515717029572, 'timestamp': '2025-09-30 22:15:55.171054', 'step': 3140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:15:55.204097', 'step': 3140, 'epoch': 1} {'type': 'loss', 'content': 0.22725136578083038, 'timestamp': '2025-09-30 22:15:55.206449', 'step': 3141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.236962', 'step': 3141, 'epoch': 1} {'type': 'loss', 'content': 0.14217765629291534, 'timestamp': '2025-09-30 22:15:55.247309', 'step': 3142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:55.277868', 'step': 3142, 'epoch': 1} {'type': 'loss', 'content': 0.30512166023254395, 'timestamp': '2025-09-30 22:15:55.281982', 'step': 3143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.311812', 'step': 3143, 'epoch': 1} {'type': 'loss', 'content': 0.18340764939785004, 'timestamp': '2025-09-30 22:15:55.335510', 'step': 3144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:55.369013', 'step': 3144, 'epoch': 1} {'type': 'loss', 'content': 0.16655677556991577, 'timestamp': '2025-09-30 22:15:55.373870', 'step': 3145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.407021', 'step': 3145, 'epoch': 1} {'type': 'loss', 'content': 0.22339019179344177, 'timestamp': '2025-09-30 22:15:55.410966', 'step': 3146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.443679', 'step': 3146, 'epoch': 1} {'type': 'loss', 'content': 0.16875967383384705, 'timestamp': '2025-09-30 22:15:55.448768', 'step': 3147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.482231', 'step': 3147, 'epoch': 1} {'type': 'loss', 'content': 0.26376986503601074, 'timestamp': '2025-09-30 22:15:55.505977', 'step': 3148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.537851', 'step': 3148, 'epoch': 1} {'type': 'loss', 'content': 0.2077982872724533, 'timestamp': '2025-09-30 22:15:55.542490', 'step': 3149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.575703', 'step': 3149, 'epoch': 1} {'type': 'loss', 'content': 0.18546022474765778, 'timestamp': '2025-09-30 22:15:55.578215', 'step': 3150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.611152', 'step': 3150, 'epoch': 1} {'type': 'loss', 'content': 0.27115145325660706, 'timestamp': '2025-09-30 22:15:55.618168', 'step': 3151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.649836', 'step': 3151, 'epoch': 1} {'type': 'loss', 'content': 0.1307740956544876, 'timestamp': '2025-09-30 22:15:55.675590', 'step': 3152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.706004', 'step': 3152, 'epoch': 1} {'type': 'loss', 'content': 0.17010600864887238, 'timestamp': '2025-09-30 22:15:55.708979', 'step': 3153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.741032', 'step': 3153, 'epoch': 1} {'type': 'loss', 'content': 0.11976547539234161, 'timestamp': '2025-09-30 22:15:55.745121', 'step': 3154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.778317', 'step': 3154, 'epoch': 1} {'type': 'loss', 'content': 0.19270965456962585, 'timestamp': '2025-09-30 22:15:55.783950', 'step': 3155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.818983', 'step': 3155, 'epoch': 1} {'type': 'loss', 'content': 0.18967732787132263, 'timestamp': '2025-09-30 22:15:55.843303', 'step': 3156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:55.876419', 'step': 3156, 'epoch': 1} {'type': 'loss', 'content': 0.21061190962791443, 'timestamp': '2025-09-30 22:15:55.888580', 'step': 3157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:55.919009', 'step': 3157, 'epoch': 1} {'type': 'loss', 'content': 0.2108369767665863, 'timestamp': '2025-09-30 22:15:55.924338', 'step': 3158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:55.957696', 'step': 3158, 'epoch': 1} {'type': 'loss', 'content': 0.13680674135684967, 'timestamp': '2025-09-30 22:15:55.966717', 'step': 3159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.002870', 'step': 3159, 'epoch': 1} {'type': 'loss', 'content': 0.19682729244232178, 'timestamp': '2025-09-30 22:15:56.029752', 'step': 3160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:56.062106', 'step': 3160, 'epoch': 1} {'type': 'loss', 'content': 0.1731124371290207, 'timestamp': '2025-09-30 22:15:56.067831', 'step': 3161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.101535', 'step': 3161, 'epoch': 1} {'type': 'loss', 'content': 0.13893257081508636, 'timestamp': '2025-09-30 22:15:56.109386', 'step': 3162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.145254', 'step': 3162, 'epoch': 1} {'type': 'loss', 'content': 0.24293257296085358, 'timestamp': '2025-09-30 22:15:56.147715', 'step': 3163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:56.180867', 'step': 3163, 'epoch': 1} {'type': 'loss', 'content': 0.23217396438121796, 'timestamp': '2025-09-30 22:15:56.207992', 'step': 3164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:56.254324', 'step': 3164, 'epoch': 1} {'type': 'loss', 'content': 0.2935357689857483, 'timestamp': '2025-09-30 22:15:56.258003', 'step': 3165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.289240', 'step': 3165, 'epoch': 1} {'type': 'loss', 'content': 0.20386211574077606, 'timestamp': '2025-09-30 22:15:56.292629', 'step': 3166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.325854', 'step': 3166, 'epoch': 1} {'type': 'loss', 'content': 0.21998195350170135, 'timestamp': '2025-09-30 22:15:56.342905', 'step': 3167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:56.374758', 'step': 3167, 'epoch': 1} {'type': 'loss', 'content': 0.2214099019765854, 'timestamp': '2025-09-30 22:15:56.401479', 'step': 3168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:56.447280', 'step': 3168, 'epoch': 1} {'type': 'loss', 'content': 0.08970893919467926, 'timestamp': '2025-09-30 22:15:56.453181', 'step': 3169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.498171', 'step': 3169, 'epoch': 1} {'type': 'loss', 'content': 0.11449979245662689, 'timestamp': '2025-09-30 22:15:56.503145', 'step': 3170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.537865', 'step': 3170, 'epoch': 1} {'type': 'loss', 'content': 0.16429951786994934, 'timestamp': '2025-09-30 22:15:56.545989', 'step': 3171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:56.588674', 'step': 3171, 'epoch': 1} {'type': 'loss', 'content': 0.20613086223602295, 'timestamp': '2025-09-30 22:15:56.615298', 'step': 3172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.647159', 'step': 3172, 'epoch': 1} {'type': 'loss', 'content': 0.17444056272506714, 'timestamp': '2025-09-30 22:15:56.664638', 'step': 3173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.718129', 'step': 3173, 'epoch': 1} {'type': 'loss', 'content': 0.159679114818573, 'timestamp': '2025-09-30 22:15:56.722145', 'step': 3174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:56.755745', 'step': 3174, 'epoch': 1} {'type': 'loss', 'content': 0.17160145938396454, 'timestamp': '2025-09-30 22:15:56.760228', 'step': 3175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.798176', 'step': 3175, 'epoch': 1} {'type': 'loss', 'content': 0.23116788268089294, 'timestamp': '2025-09-30 22:15:56.831522', 'step': 3176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:56.862661', 'step': 3176, 'epoch': 1} {'type': 'loss', 'content': 0.1841817945241928, 'timestamp': '2025-09-30 22:15:56.869606', 'step': 3177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.909383', 'step': 3177, 'epoch': 1} {'type': 'loss', 'content': 0.22292087972164154, 'timestamp': '2025-09-30 22:15:56.913500', 'step': 3178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.944698', 'step': 3178, 'epoch': 1} {'type': 'loss', 'content': 0.18423572182655334, 'timestamp': '2025-09-30 22:15:56.948948', 'step': 3179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:56.981723', 'step': 3179, 'epoch': 1} {'type': 'loss', 'content': 0.13419978320598602, 'timestamp': '2025-09-30 22:15:57.007635', 'step': 3180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:57.045226', 'step': 3180, 'epoch': 1} {'type': 'loss', 'content': 0.15527842938899994, 'timestamp': '2025-09-30 22:15:57.050121', 'step': 3181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.082160', 'step': 3181, 'epoch': 1} {'type': 'loss', 'content': 0.1289040595293045, 'timestamp': '2025-09-30 22:15:57.088056', 'step': 3182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.121457', 'step': 3182, 'epoch': 1} {'type': 'loss', 'content': 0.15739238262176514, 'timestamp': '2025-09-30 22:15:57.126240', 'step': 3183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.159444', 'step': 3183, 'epoch': 1} {'type': 'loss', 'content': 0.16605478525161743, 'timestamp': '2025-09-30 22:15:57.186727', 'step': 3184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:57.219946', 'step': 3184, 'epoch': 1} {'type': 'loss', 'content': 0.2592211067676544, 'timestamp': '2025-09-30 22:15:57.225102', 'step': 3185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.259799', 'step': 3185, 'epoch': 1} {'type': 'loss', 'content': 0.19289663434028625, 'timestamp': '2025-09-30 22:15:57.265582', 'step': 3186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.299052', 'step': 3186, 'epoch': 1} {'type': 'loss', 'content': 0.15900592505931854, 'timestamp': '2025-09-30 22:15:57.303055', 'step': 3187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:57.336103', 'step': 3187, 'epoch': 1} {'type': 'loss', 'content': 0.22823713719844818, 'timestamp': '2025-09-30 22:15:57.361548', 'step': 3188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.392321', 'step': 3188, 'epoch': 1} {'type': 'loss', 'content': 0.14405834674835205, 'timestamp': '2025-09-30 22:15:57.401332', 'step': 3189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.440013', 'step': 3189, 'epoch': 1} {'type': 'loss', 'content': 0.11803904920816422, 'timestamp': '2025-09-30 22:15:57.444484', 'step': 3190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.477338', 'step': 3190, 'epoch': 1} {'type': 'loss', 'content': 0.13849420845508575, 'timestamp': '2025-09-30 22:15:57.485890', 'step': 3191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.516718', 'step': 3191, 'epoch': 1} {'type': 'loss', 'content': 0.12246213108301163, 'timestamp': '2025-09-30 22:15:57.541120', 'step': 3192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:57.577500', 'step': 3192, 'epoch': 1} {'type': 'loss', 'content': 0.1312175691127777, 'timestamp': '2025-09-30 22:15:57.580745', 'step': 3193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:57.613889', 'step': 3193, 'epoch': 1} {'type': 'loss', 'content': 0.20574499666690826, 'timestamp': '2025-09-30 22:15:57.617679', 'step': 3194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.651599', 'step': 3194, 'epoch': 1} {'type': 'loss', 'content': 0.15680468082427979, 'timestamp': '2025-09-30 22:15:57.667643', 'step': 3195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.698822', 'step': 3195, 'epoch': 1} {'type': 'loss', 'content': 0.1114591434597969, 'timestamp': '2025-09-30 22:15:57.725022', 'step': 3196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.771163', 'step': 3196, 'epoch': 1} {'type': 'loss', 'content': 0.08542942255735397, 'timestamp': '2025-09-30 22:15:57.777179', 'step': 3197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:57.811831', 'step': 3197, 'epoch': 1} {'type': 'loss', 'content': 0.10287517309188843, 'timestamp': '2025-09-30 22:15:57.815770', 'step': 3198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.860293', 'step': 3198, 'epoch': 1} {'type': 'loss', 'content': 0.19341309368610382, 'timestamp': '2025-09-30 22:15:57.875243', 'step': 3199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:57.921279', 'step': 3199, 'epoch': 1} {'type': 'loss', 'content': 0.11838135123252869, 'timestamp': '2025-09-30 22:15:57.947374', 'step': 3200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:57.978992', 'step': 3200, 'epoch': 1} {'type': 'loss', 'content': 0.18118928372859955, 'timestamp': '2025-09-30 22:15:57.986480', 'step': 3201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.020803', 'step': 3201, 'epoch': 1} {'type': 'loss', 'content': 0.25893503427505493, 'timestamp': '2025-09-30 22:15:58.037420', 'step': 3202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.071620', 'step': 3202, 'epoch': 1} {'type': 'loss', 'content': 0.18758068978786469, 'timestamp': '2025-09-30 22:15:58.075388', 'step': 3203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.107364', 'step': 3203, 'epoch': 1} {'type': 'loss', 'content': 0.21082428097724915, 'timestamp': '2025-09-30 22:15:58.132049', 'step': 3204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.178331', 'step': 3204, 'epoch': 1} {'type': 'loss', 'content': 0.2027026116847992, 'timestamp': '2025-09-30 22:15:58.181424', 'step': 3205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.217630', 'step': 3205, 'epoch': 1} {'type': 'loss', 'content': 0.1514265388250351, 'timestamp': '2025-09-30 22:15:58.220478', 'step': 3206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:58.251581', 'step': 3206, 'epoch': 1} {'type': 'loss', 'content': 0.1231033131480217, 'timestamp': '2025-09-30 22:15:58.258834', 'step': 3207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:58.290225', 'step': 3207, 'epoch': 1} {'type': 'loss', 'content': 0.14034035801887512, 'timestamp': '2025-09-30 22:15:58.314457', 'step': 3208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.349296', 'step': 3208, 'epoch': 1} {'type': 'loss', 'content': 0.08980566263198853, 'timestamp': '2025-09-30 22:15:58.351654', 'step': 3209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.381875', 'step': 3209, 'epoch': 1} {'type': 'loss', 'content': 0.22793646156787872, 'timestamp': '2025-09-30 22:15:58.387938', 'step': 3210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.418571', 'step': 3210, 'epoch': 1} {'type': 'loss', 'content': 0.19464750587940216, 'timestamp': '2025-09-30 22:15:58.421318', 'step': 3211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:58.453160', 'step': 3211, 'epoch': 1} {'type': 'loss', 'content': 0.13027557730674744, 'timestamp': '2025-09-30 22:15:58.477463', 'step': 3212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.508666', 'step': 3212, 'epoch': 1} {'type': 'loss', 'content': 0.1428382694721222, 'timestamp': '2025-09-30 22:15:58.514431', 'step': 3213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.552460', 'step': 3213, 'epoch': 1} {'type': 'loss', 'content': 0.1512656807899475, 'timestamp': '2025-09-30 22:15:58.564470', 'step': 3214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.597151', 'step': 3214, 'epoch': 1} {'type': 'loss', 'content': 0.17512689530849457, 'timestamp': '2025-09-30 22:15:58.600055', 'step': 3215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.635101', 'step': 3215, 'epoch': 1} {'type': 'loss', 'content': 0.1311892718076706, 'timestamp': '2025-09-30 22:15:58.662150', 'step': 3216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.694364', 'step': 3216, 'epoch': 1} {'type': 'loss', 'content': 0.23799088597297668, 'timestamp': '2025-09-30 22:15:58.699209', 'step': 3217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:58.732351', 'step': 3217, 'epoch': 1} {'type': 'loss', 'content': 0.140761137008667, 'timestamp': '2025-09-30 22:15:58.735467', 'step': 3218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:58.767018', 'step': 3218, 'epoch': 1} {'type': 'loss', 'content': 0.1543777883052826, 'timestamp': '2025-09-30 22:15:58.769672', 'step': 3219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:58.800981', 'step': 3219, 'epoch': 1} {'type': 'loss', 'content': 0.12763085961341858, 'timestamp': '2025-09-30 22:15:58.827868', 'step': 3220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:58.859471', 'step': 3220, 'epoch': 1} {'type': 'loss', 'content': 0.20584633946418762, 'timestamp': '2025-09-30 22:15:58.862630', 'step': 3221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:58.894549', 'step': 3221, 'epoch': 1} {'type': 'loss', 'content': 0.16434518992900848, 'timestamp': '2025-09-30 22:15:58.898553', 'step': 3222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:58.931077', 'step': 3222, 'epoch': 1} {'type': 'loss', 'content': 0.24599196016788483, 'timestamp': '2025-09-30 22:15:58.935876', 'step': 3223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:15:58.967934', 'step': 3223, 'epoch': 1} {'type': 'loss', 'content': 0.1466054469347, 'timestamp': '2025-09-30 22:15:58.993679', 'step': 3224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:15:59.024147', 'step': 3224, 'epoch': 1} {'type': 'loss', 'content': 0.16976714134216309, 'timestamp': '2025-09-30 22:15:59.027843', 'step': 3225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.062049', 'step': 3225, 'epoch': 1} {'type': 'loss', 'content': 0.12713706493377686, 'timestamp': '2025-09-30 22:15:59.067551', 'step': 3226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.110523', 'step': 3226, 'epoch': 1} {'type': 'loss', 'content': 0.19430431723594666, 'timestamp': '2025-09-30 22:15:59.112975', 'step': 3227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.147384', 'step': 3227, 'epoch': 1} {'type': 'loss', 'content': 0.2860325574874878, 'timestamp': '2025-09-30 22:15:59.171850', 'step': 3228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:59.214246', 'step': 3228, 'epoch': 1} {'type': 'loss', 'content': 0.10912725329399109, 'timestamp': '2025-09-30 22:15:59.220914', 'step': 3229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.253596', 'step': 3229, 'epoch': 1} {'type': 'loss', 'content': 0.16605441272258759, 'timestamp': '2025-09-30 22:15:59.256740', 'step': 3230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:15:59.290608', 'step': 3230, 'epoch': 1} {'type': 'loss', 'content': 0.13067060708999634, 'timestamp': '2025-09-30 22:15:59.296775', 'step': 3231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:59.329784', 'step': 3231, 'epoch': 1} {'type': 'loss', 'content': 0.22025085985660553, 'timestamp': '2025-09-30 22:15:59.353914', 'step': 3232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:15:59.386549', 'step': 3232, 'epoch': 1} {'type': 'loss', 'content': 0.11256074160337448, 'timestamp': '2025-09-30 22:15:59.390315', 'step': 3233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.422510', 'step': 3233, 'epoch': 1} {'type': 'loss', 'content': 0.10470494627952576, 'timestamp': '2025-09-30 22:15:59.425164', 'step': 3234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:15:59.459197', 'step': 3234, 'epoch': 1} {'type': 'loss', 'content': 0.20633363723754883, 'timestamp': '2025-09-30 22:15:59.463242', 'step': 3235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.496706', 'step': 3235, 'epoch': 1} {'type': 'loss', 'content': 0.1858215034008026, 'timestamp': '2025-09-30 22:15:59.522202', 'step': 3236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:59.554666', 'step': 3236, 'epoch': 1} {'type': 'loss', 'content': 0.22313888370990753, 'timestamp': '2025-09-30 22:15:59.557731', 'step': 3237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.588599', 'step': 3237, 'epoch': 1} {'type': 'loss', 'content': 0.19235637784004211, 'timestamp': '2025-09-30 22:15:59.590947', 'step': 3238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:15:59.639856', 'step': 3238, 'epoch': 1} {'type': 'loss', 'content': 0.12916183471679688, 'timestamp': '2025-09-30 22:15:59.642259', 'step': 3239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.673655', 'step': 3239, 'epoch': 1} {'type': 'loss', 'content': 0.18995271623134613, 'timestamp': '2025-09-30 22:15:59.697319', 'step': 3240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.740081', 'step': 3240, 'epoch': 1} {'type': 'loss', 'content': 0.15545938909053802, 'timestamp': '2025-09-30 22:15:59.744045', 'step': 3241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.776416', 'step': 3241, 'epoch': 1} {'type': 'loss', 'content': 0.23053887486457825, 'timestamp': '2025-09-30 22:15:59.782234', 'step': 3242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.814259', 'step': 3242, 'epoch': 1} {'type': 'loss', 'content': 0.14495886862277985, 'timestamp': '2025-09-30 22:15:59.818848', 'step': 3243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.851284', 'step': 3243, 'epoch': 1} {'type': 'loss', 'content': 0.23163318634033203, 'timestamp': '2025-09-30 22:15:59.876209', 'step': 3244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:15:59.907632', 'step': 3244, 'epoch': 1} {'type': 'loss', 'content': 0.08047870546579361, 'timestamp': '2025-09-30 22:15:59.911573', 'step': 3245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.944007', 'step': 3245, 'epoch': 1} {'type': 'loss', 'content': 0.21698246896266937, 'timestamp': '2025-09-30 22:15:59.947617', 'step': 3246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:15:59.979094', 'step': 3246, 'epoch': 1} {'type': 'loss', 'content': 0.09354127943515778, 'timestamp': '2025-09-30 22:15:59.982961', 'step': 3247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:00.014179', 'step': 3247, 'epoch': 1} {'type': 'loss', 'content': 0.22308598458766937, 'timestamp': '2025-09-30 22:16:00.039573', 'step': 3248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:00.070791', 'step': 3248, 'epoch': 1} {'type': 'loss', 'content': 0.24910885095596313, 'timestamp': '2025-09-30 22:16:00.075668', 'step': 3249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:00.108227', 'step': 3249, 'epoch': 1} {'type': 'loss', 'content': 0.20699289441108704, 'timestamp': '2025-09-30 22:16:00.112233', 'step': 3250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:00.142650', 'step': 3250, 'epoch': 1} {'type': 'loss', 'content': 0.1596556156873703, 'timestamp': '2025-09-30 22:16:00.148036', 'step': 3251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:00.179694', 'step': 3251, 'epoch': 1} {'type': 'loss', 'content': 0.13465367257595062, 'timestamp': '2025-09-30 22:16:00.203927', 'step': 3252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:00.236287', 'step': 3252, 'epoch': 1} {'type': 'loss', 'content': 0.12344253808259964, 'timestamp': '2025-09-30 22:16:00.238546', 'step': 3253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:00.269534', 'step': 3253, 'epoch': 1} {'type': 'loss', 'content': 0.09416799247264862, 'timestamp': '2025-09-30 22:16:00.272661', 'step': 3254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:00.307664', 'step': 3254, 'epoch': 1} {'type': 'loss', 'content': 0.12333013862371445, 'timestamp': '2025-09-30 22:16:00.310153', 'step': 3255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:00.341679', 'step': 3255, 'epoch': 1} {'type': 'loss', 'content': 0.0784565806388855, 'timestamp': '2025-09-30 22:16:00.368239', 'step': 3256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:00.403931', 'step': 3256, 'epoch': 1} {'type': 'loss', 'content': 0.1662462204694748, 'timestamp': '2025-09-30 22:16:00.407499', 'step': 3257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:00.445309', 'step': 3257, 'epoch': 1} {'type': 'loss', 'content': 0.17468447983264923, 'timestamp': '2025-09-30 22:16:00.452845', 'step': 3258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:00.497128', 'step': 3258, 'epoch': 1} {'type': 'loss', 'content': 0.2106528878211975, 'timestamp': '2025-09-30 22:16:00.499280', 'step': 3259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:00.539120', 'step': 3259, 'epoch': 1} {'type': 'loss', 'content': 0.1558314561843872, 'timestamp': '2025-09-30 22:16:00.567019', 'step': 3260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:00.604011', 'step': 3260, 'epoch': 1} {'type': 'loss', 'content': 0.19415266811847687, 'timestamp': '2025-09-30 22:16:00.608235', 'step': 3261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:00.641819', 'step': 3261, 'epoch': 1} {'type': 'loss', 'content': 0.11350904405117035, 'timestamp': '2025-09-30 22:16:00.646490', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:16:08.705782', 'step': 3262, 'epoch': 1} {'type': 'pplx', 'content': 9137.507185849287, 'timestamp': '2025-09-30 22:16:08.716687', 'step': 3262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:08.752586', 'step': 3262, 'epoch': 1} {'type': 'loss', 'content': 0.1684078723192215, 'timestamp': '2025-09-30 22:16:08.755831', 'step': 3263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:08.797386', 'step': 3263, 'epoch': 1} {'type': 'loss', 'content': 0.17123349010944366, 'timestamp': '2025-09-30 22:16:08.822620', 'step': 3264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:08.855038', 'step': 3264, 'epoch': 1} {'type': 'loss', 'content': 0.13138461112976074, 'timestamp': '2025-09-30 22:16:08.858737', 'step': 3265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:08.895446', 'step': 3265, 'epoch': 1} {'type': 'loss', 'content': 0.27280721068382263, 'timestamp': '2025-09-30 22:16:08.901421', 'step': 3266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:08.938194', 'step': 3266, 'epoch': 1} {'type': 'loss', 'content': 0.231723353266716, 'timestamp': '2025-09-30 22:16:08.943096', 'step': 3267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:08.974902', 'step': 3267, 'epoch': 1} {'type': 'loss', 'content': 0.13515406847000122, 'timestamp': '2025-09-30 22:16:08.999426', 'step': 3268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.030998', 'step': 3268, 'epoch': 1} {'type': 'loss', 'content': 0.18285061419010162, 'timestamp': '2025-09-30 22:16:09.036435', 'step': 3269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:09.068632', 'step': 3269, 'epoch': 1} {'type': 'loss', 'content': 0.17984256148338318, 'timestamp': '2025-09-30 22:16:09.071619', 'step': 3270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.104457', 'step': 3270, 'epoch': 1} {'type': 'loss', 'content': 0.14414004981517792, 'timestamp': '2025-09-30 22:16:09.107709', 'step': 3271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:09.140614', 'step': 3271, 'epoch': 1} {'type': 'loss', 'content': 0.08453980088233948, 'timestamp': '2025-09-30 22:16:09.166818', 'step': 3272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.197889', 'step': 3272, 'epoch': 1} {'type': 'loss', 'content': 0.17528335750102997, 'timestamp': '2025-09-30 22:16:09.203465', 'step': 3273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.239287', 'step': 3273, 'epoch': 1} {'type': 'loss', 'content': 0.1812950074672699, 'timestamp': '2025-09-30 22:16:09.243545', 'step': 3274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.277539', 'step': 3274, 'epoch': 1} {'type': 'loss', 'content': 0.0816454067826271, 'timestamp': '2025-09-30 22:16:09.286151', 'step': 3275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.320874', 'step': 3275, 'epoch': 1} {'type': 'loss', 'content': 0.15112487971782684, 'timestamp': '2025-09-30 22:16:09.345289', 'step': 3276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.396962', 'step': 3276, 'epoch': 1} {'type': 'loss', 'content': 0.1680014580488205, 'timestamp': '2025-09-30 22:16:09.399418', 'step': 3277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.430389', 'step': 3277, 'epoch': 1} {'type': 'loss', 'content': 0.09636856615543365, 'timestamp': '2025-09-30 22:16:09.433817', 'step': 3278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:09.469166', 'step': 3278, 'epoch': 1} {'type': 'loss', 'content': 0.22043205797672272, 'timestamp': '2025-09-30 22:16:09.472984', 'step': 3279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.505328', 'step': 3279, 'epoch': 1} {'type': 'loss', 'content': 0.1558237224817276, 'timestamp': '2025-09-30 22:16:09.538504', 'step': 3280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:09.570174', 'step': 3280, 'epoch': 1} {'type': 'loss', 'content': 0.12382235378026962, 'timestamp': '2025-09-30 22:16:09.572831', 'step': 3281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.610383', 'step': 3281, 'epoch': 1} {'type': 'loss', 'content': 0.06507152318954468, 'timestamp': '2025-09-30 22:16:09.619207', 'step': 3282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.657088', 'step': 3282, 'epoch': 1} {'type': 'loss', 'content': 0.2163904756307602, 'timestamp': '2025-09-30 22:16:09.660347', 'step': 3283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:09.691846', 'step': 3283, 'epoch': 1} {'type': 'loss', 'content': 0.17025785148143768, 'timestamp': '2025-09-30 22:16:09.717540', 'step': 3284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:09.749067', 'step': 3284, 'epoch': 1} {'type': 'loss', 'content': 0.24135980010032654, 'timestamp': '2025-09-30 22:16:09.755705', 'step': 3285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.788566', 'step': 3285, 'epoch': 1} {'type': 'loss', 'content': 0.16636565327644348, 'timestamp': '2025-09-30 22:16:09.794759', 'step': 3286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:09.827126', 'step': 3286, 'epoch': 1} {'type': 'loss', 'content': 0.14985564351081848, 'timestamp': '2025-09-30 22:16:09.829580', 'step': 3287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.860598', 'step': 3287, 'epoch': 1} {'type': 'loss', 'content': 0.20105235278606415, 'timestamp': '2025-09-30 22:16:09.884775', 'step': 3288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:09.916595', 'step': 3288, 'epoch': 1} {'type': 'loss', 'content': 0.17013829946517944, 'timestamp': '2025-09-30 22:16:09.924355', 'step': 3289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.955934', 'step': 3289, 'epoch': 1} {'type': 'loss', 'content': 0.2815709710121155, 'timestamp': '2025-09-30 22:16:09.967269', 'step': 3290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:09.999265', 'step': 3290, 'epoch': 1} {'type': 'loss', 'content': 0.17011238634586334, 'timestamp': '2025-09-30 22:16:10.003923', 'step': 3291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.049695', 'step': 3291, 'epoch': 1} {'type': 'loss', 'content': 0.060602009296417236, 'timestamp': '2025-09-30 22:16:10.075641', 'step': 3292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:10.105485', 'step': 3292, 'epoch': 1} {'type': 'loss', 'content': 0.26936566829681396, 'timestamp': '2025-09-30 22:16:10.109058', 'step': 3293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.140904', 'step': 3293, 'epoch': 1} {'type': 'loss', 'content': 0.19957739114761353, 'timestamp': '2025-09-30 22:16:10.147548', 'step': 3294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:10.179260', 'step': 3294, 'epoch': 1} {'type': 'loss', 'content': 0.14009785652160645, 'timestamp': '2025-09-30 22:16:10.181642', 'step': 3295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:10.214748', 'step': 3295, 'epoch': 1} {'type': 'loss', 'content': 0.12954100966453552, 'timestamp': '2025-09-30 22:16:10.241464', 'step': 3296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:10.275995', 'step': 3296, 'epoch': 1} {'type': 'loss', 'content': 0.13494326174259186, 'timestamp': '2025-09-30 22:16:10.280472', 'step': 3297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:10.314276', 'step': 3297, 'epoch': 1} {'type': 'loss', 'content': 0.15035182237625122, 'timestamp': '2025-09-30 22:16:10.316695', 'step': 3298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.347571', 'step': 3298, 'epoch': 1} {'type': 'loss', 'content': 0.10639981180429459, 'timestamp': '2025-09-30 22:16:10.352351', 'step': 3299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.387279', 'step': 3299, 'epoch': 1} {'type': 'loss', 'content': 0.16250082850456238, 'timestamp': '2025-09-30 22:16:10.412703', 'step': 3300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.445065', 'step': 3300, 'epoch': 1} {'type': 'loss', 'content': 0.12263140827417374, 'timestamp': '2025-09-30 22:16:10.447667', 'step': 3301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.478265', 'step': 3301, 'epoch': 1} {'type': 'loss', 'content': 0.2634204030036926, 'timestamp': '2025-09-30 22:16:10.482828', 'step': 3302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.515371', 'step': 3302, 'epoch': 1} {'type': 'loss', 'content': 0.20870724320411682, 'timestamp': '2025-09-30 22:16:10.520641', 'step': 3303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.554647', 'step': 3303, 'epoch': 1} {'type': 'loss', 'content': 0.12338937073945999, 'timestamp': '2025-09-30 22:16:10.578794', 'step': 3304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:10.611966', 'step': 3304, 'epoch': 1} {'type': 'loss', 'content': 0.2668788433074951, 'timestamp': '2025-09-30 22:16:10.614484', 'step': 3305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.645294', 'step': 3305, 'epoch': 1} {'type': 'loss', 'content': 0.12297815829515457, 'timestamp': '2025-09-30 22:16:10.648702', 'step': 3306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.679904', 'step': 3306, 'epoch': 1} {'type': 'loss', 'content': 0.1712741255760193, 'timestamp': '2025-09-30 22:16:10.687742', 'step': 3307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.752540', 'step': 3307, 'epoch': 1} {'type': 'loss', 'content': 0.13868829607963562, 'timestamp': '2025-09-30 22:16:10.777529', 'step': 3308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.819810', 'step': 3308, 'epoch': 1} {'type': 'loss', 'content': 0.15807364881038666, 'timestamp': '2025-09-30 22:16:10.827397', 'step': 3309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.875096', 'step': 3309, 'epoch': 1} {'type': 'loss', 'content': 0.1543089598417282, 'timestamp': '2025-09-30 22:16:10.878417', 'step': 3310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:10.911922', 'step': 3310, 'epoch': 1} {'type': 'loss', 'content': 0.19698622822761536, 'timestamp': '2025-09-30 22:16:10.914052', 'step': 3311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:10.948353', 'step': 3311, 'epoch': 1} {'type': 'loss', 'content': 0.16602693498134613, 'timestamp': '2025-09-30 22:16:10.972877', 'step': 3312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:11.005590', 'step': 3312, 'epoch': 1} {'type': 'loss', 'content': 0.14575031399726868, 'timestamp': '2025-09-30 22:16:11.008065', 'step': 3313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:11.042225', 'step': 3313, 'epoch': 1} {'type': 'loss', 'content': 0.16743043065071106, 'timestamp': '2025-09-30 22:16:11.046625', 'step': 3314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:11.084545', 'step': 3314, 'epoch': 1} {'type': 'loss', 'content': 0.1421240270137787, 'timestamp': '2025-09-30 22:16:11.089571', 'step': 3315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:11.126884', 'step': 3315, 'epoch': 1} {'type': 'loss', 'content': 0.1262393742799759, 'timestamp': '2025-09-30 22:16:11.154950', 'step': 3316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:11.197339', 'step': 3316, 'epoch': 1} {'type': 'loss', 'content': 0.13007789850234985, 'timestamp': '2025-09-30 22:16:11.202432', 'step': 3317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:11.244172', 'step': 3317, 'epoch': 1} {'type': 'loss', 'content': 0.15008515119552612, 'timestamp': '2025-09-30 22:16:11.249465', 'step': 3318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:11.289369', 'step': 3318, 'epoch': 1} {'type': 'loss', 'content': 0.18176054954528809, 'timestamp': '2025-09-30 22:16:11.297704', 'step': 3319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:11.331789', 'step': 3319, 'epoch': 1} {'type': 'loss', 'content': 0.14075340330600739, 'timestamp': '2025-09-30 22:16:11.355611', 'step': 3320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:11.392680', 'step': 3320, 'epoch': 1} {'type': 'loss', 'content': 0.21133726835250854, 'timestamp': '2025-09-30 22:16:11.395626', 'step': 3321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:11.440314', 'step': 3321, 'epoch': 1} {'type': 'loss', 'content': 0.10902988165616989, 'timestamp': '2025-09-30 22:16:11.447213', 'step': 3322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:11.481834', 'step': 3322, 'epoch': 1} {'type': 'loss', 'content': 0.08621476590633392, 'timestamp': '2025-09-30 22:16:11.486214', 'step': 3323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:11.520456', 'step': 3323, 'epoch': 1} {'type': 'loss', 'content': 0.13381054997444153, 'timestamp': '2025-09-30 22:16:11.547298', 'step': 3324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:11.588862', 'step': 3324, 'epoch': 1} {'type': 'loss', 'content': 0.2798851728439331, 'timestamp': '2025-09-30 22:16:11.592669', 'step': 3325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:11.636720', 'step': 3325, 'epoch': 1} {'type': 'loss', 'content': 0.13385869562625885, 'timestamp': '2025-09-30 22:16:11.641172', 'step': 3326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:11.680382', 'step': 3326, 'epoch': 1} {'type': 'loss', 'content': 0.20916993916034698, 'timestamp': '2025-09-30 22:16:11.686859', 'step': 3327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:11.724491', 'step': 3327, 'epoch': 1} {'type': 'loss', 'content': 0.21580657362937927, 'timestamp': '2025-09-30 22:16:11.750616', 'step': 3328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:11.785031', 'step': 3328, 'epoch': 1} {'type': 'loss', 'content': 0.22357797622680664, 'timestamp': '2025-09-30 22:16:11.791492', 'step': 3329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:11.852869', 'step': 3329, 'epoch': 1} {'type': 'loss', 'content': 0.1135638952255249, 'timestamp': '2025-09-30 22:16:11.855334', 'step': 3330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:11.904272', 'step': 3330, 'epoch': 1} {'type': 'loss', 'content': 0.08798428624868393, 'timestamp': '2025-09-30 22:16:11.907336', 'step': 3331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:11.955405', 'step': 3331, 'epoch': 1} {'type': 'loss', 'content': 0.19865337014198303, 'timestamp': '2025-09-30 22:16:11.981979', 'step': 3332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:12.020895', 'step': 3332, 'epoch': 1} {'type': 'loss', 'content': 0.1071227416396141, 'timestamp': '2025-09-30 22:16:12.023399', 'step': 3333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:12.056375', 'step': 3333, 'epoch': 1} {'type': 'loss', 'content': 0.22811126708984375, 'timestamp': '2025-09-30 22:16:12.061113', 'step': 3334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:12.109669', 'step': 3334, 'epoch': 1} {'type': 'loss', 'content': 0.1438247263431549, 'timestamp': '2025-09-30 22:16:12.113242', 'step': 3335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:12.158950', 'step': 3335, 'epoch': 1} {'type': 'loss', 'content': 0.17223821580410004, 'timestamp': '2025-09-30 22:16:12.182755', 'step': 3336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.214128', 'step': 3336, 'epoch': 1} {'type': 'loss', 'content': 0.11643792688846588, 'timestamp': '2025-09-30 22:16:12.219527', 'step': 3337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.254067', 'step': 3337, 'epoch': 1} {'type': 'loss', 'content': 0.12937062978744507, 'timestamp': '2025-09-30 22:16:12.257077', 'step': 3338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:12.290173', 'step': 3338, 'epoch': 1} {'type': 'loss', 'content': 0.2822760045528412, 'timestamp': '2025-09-30 22:16:12.293656', 'step': 3339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:16:12.346715', 'step': 3339, 'epoch': 1} {'type': 'loss', 'content': 0.18944893777370453, 'timestamp': '2025-09-30 22:16:12.383959', 'step': 3340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.421306', 'step': 3340, 'epoch': 1} {'type': 'loss', 'content': 0.21544009447097778, 'timestamp': '2025-09-30 22:16:12.427315', 'step': 3341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:12.457717', 'step': 3341, 'epoch': 1} {'type': 'loss', 'content': 0.2654949426651001, 'timestamp': '2025-09-30 22:16:12.460183', 'step': 3342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.492899', 'step': 3342, 'epoch': 1} {'type': 'loss', 'content': 0.16433250904083252, 'timestamp': '2025-09-30 22:16:12.496502', 'step': 3343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.526708', 'step': 3343, 'epoch': 1} {'type': 'loss', 'content': 0.23040403425693512, 'timestamp': '2025-09-30 22:16:12.551058', 'step': 3344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:12.582245', 'step': 3344, 'epoch': 1} {'type': 'loss', 'content': 0.15255582332611084, 'timestamp': '2025-09-30 22:16:12.584452', 'step': 3345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:12.618887', 'step': 3345, 'epoch': 1} {'type': 'loss', 'content': 0.1757839471101761, 'timestamp': '2025-09-30 22:16:12.623845', 'step': 3346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:12.659160', 'step': 3346, 'epoch': 1} {'type': 'loss', 'content': 0.1585429608821869, 'timestamp': '2025-09-30 22:16:12.666749', 'step': 3347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.702198', 'step': 3347, 'epoch': 1} {'type': 'loss', 'content': 0.15240153670310974, 'timestamp': '2025-09-30 22:16:12.725893', 'step': 3348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.758315', 'step': 3348, 'epoch': 1} {'type': 'loss', 'content': 0.31653666496276855, 'timestamp': '2025-09-30 22:16:12.766403', 'step': 3349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:12.796653', 'step': 3349, 'epoch': 1} {'type': 'loss', 'content': 0.19061806797981262, 'timestamp': '2025-09-30 22:16:12.799119', 'step': 3350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.832094', 'step': 3350, 'epoch': 1} {'type': 'loss', 'content': 0.11907902359962463, 'timestamp': '2025-09-30 22:16:12.834932', 'step': 3351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:12.866453', 'step': 3351, 'epoch': 1} {'type': 'loss', 'content': 0.14822714030742645, 'timestamp': '2025-09-30 22:16:12.893964', 'step': 3352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:12.925348', 'step': 3352, 'epoch': 1} {'type': 'loss', 'content': 0.15536655485630035, 'timestamp': '2025-09-30 22:16:12.931151', 'step': 3353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:12.967759', 'step': 3353, 'epoch': 1} {'type': 'loss', 'content': 0.1319723129272461, 'timestamp': '2025-09-30 22:16:12.970344', 'step': 3354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:13.000915', 'step': 3354, 'epoch': 1} {'type': 'loss', 'content': 0.23228730261325836, 'timestamp': '2025-09-30 22:16:13.003379', 'step': 3355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:13.033813', 'step': 3355, 'epoch': 1} {'type': 'loss', 'content': 0.2066374123096466, 'timestamp': '2025-09-30 22:16:13.063612', 'step': 3356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.121847', 'step': 3356, 'epoch': 1} {'type': 'loss', 'content': 0.13016405701637268, 'timestamp': '2025-09-30 22:16:13.125520', 'step': 3357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:13.156763', 'step': 3357, 'epoch': 1} {'type': 'loss', 'content': 0.13350898027420044, 'timestamp': '2025-09-30 22:16:13.165729', 'step': 3358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:13.196686', 'step': 3358, 'epoch': 1} {'type': 'loss', 'content': 0.09978123754262924, 'timestamp': '2025-09-30 22:16:13.199274', 'step': 3359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:13.229483', 'step': 3359, 'epoch': 1} {'type': 'loss', 'content': 0.19197824597358704, 'timestamp': '2025-09-30 22:16:13.254352', 'step': 3360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.306325', 'step': 3360, 'epoch': 1} {'type': 'loss', 'content': 0.16408784687519073, 'timestamp': '2025-09-30 22:16:13.308991', 'step': 3361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.342860', 'step': 3361, 'epoch': 1} {'type': 'loss', 'content': 0.16339582204818726, 'timestamp': '2025-09-30 22:16:13.347075', 'step': 3362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:13.379091', 'step': 3362, 'epoch': 1} {'type': 'loss', 'content': 0.08393064886331558, 'timestamp': '2025-09-30 22:16:13.381765', 'step': 3363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.413407', 'step': 3363, 'epoch': 1} {'type': 'loss', 'content': 0.16741478443145752, 'timestamp': '2025-09-30 22:16:13.437690', 'step': 3364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.479325', 'step': 3364, 'epoch': 1} {'type': 'loss', 'content': 0.2869572341442108, 'timestamp': '2025-09-30 22:16:13.482292', 'step': 3365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:13.512851', 'step': 3365, 'epoch': 1} {'type': 'loss', 'content': 0.11142222583293915, 'timestamp': '2025-09-30 22:16:13.519048', 'step': 3366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:13.550704', 'step': 3366, 'epoch': 1} {'type': 'loss', 'content': 0.18915413320064545, 'timestamp': '2025-09-30 22:16:13.554023', 'step': 3367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:13.585200', 'step': 3367, 'epoch': 1} {'type': 'loss', 'content': 0.16225562989711761, 'timestamp': '2025-09-30 22:16:13.610702', 'step': 3368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.642190', 'step': 3368, 'epoch': 1} {'type': 'loss', 'content': 0.2333780974149704, 'timestamp': '2025-09-30 22:16:13.648503', 'step': 3369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:13.682625', 'step': 3369, 'epoch': 1} {'type': 'loss', 'content': 0.1298743337392807, 'timestamp': '2025-09-30 22:16:13.685230', 'step': 3370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:13.724965', 'step': 3370, 'epoch': 1} {'type': 'loss', 'content': 0.2130083292722702, 'timestamp': '2025-09-30 22:16:13.727620', 'step': 3371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:13.760821', 'step': 3371, 'epoch': 1} {'type': 'loss', 'content': 0.14741121232509613, 'timestamp': '2025-09-30 22:16:13.785483', 'step': 3372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:13.816087', 'step': 3372, 'epoch': 1} {'type': 'loss', 'content': 0.13553737103939056, 'timestamp': '2025-09-30 22:16:13.819494', 'step': 3373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:13.851130', 'step': 3373, 'epoch': 1} {'type': 'loss', 'content': 0.15440598130226135, 'timestamp': '2025-09-30 22:16:13.854127', 'step': 3374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.884388', 'step': 3374, 'epoch': 1} {'type': 'loss', 'content': 0.21707285940647125, 'timestamp': '2025-09-30 22:16:13.888234', 'step': 3375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:13.920725', 'step': 3375, 'epoch': 1} {'type': 'loss', 'content': 0.2508852183818817, 'timestamp': '2025-09-30 22:16:13.944652', 'step': 3376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:13.975690', 'step': 3376, 'epoch': 1} {'type': 'loss', 'content': 0.24288122355937958, 'timestamp': '2025-09-30 22:16:13.978519', 'step': 3377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.010841', 'step': 3377, 'epoch': 1} {'type': 'loss', 'content': 0.1601741462945938, 'timestamp': '2025-09-30 22:16:14.013340', 'step': 3378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.050138', 'step': 3378, 'epoch': 1} {'type': 'loss', 'content': 0.13201245665550232, 'timestamp': '2025-09-30 22:16:14.053038', 'step': 3379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:14.084777', 'step': 3379, 'epoch': 1} {'type': 'loss', 'content': 0.16926686465740204, 'timestamp': '2025-09-30 22:16:14.110144', 'step': 3380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:14.141175', 'step': 3380, 'epoch': 1} {'type': 'loss', 'content': 0.20072753727436066, 'timestamp': '2025-09-30 22:16:14.144722', 'step': 3381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.176836', 'step': 3381, 'epoch': 1} {'type': 'loss', 'content': 0.22025486826896667, 'timestamp': '2025-09-30 22:16:14.183600', 'step': 3382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.219040', 'step': 3382, 'epoch': 1} {'type': 'loss', 'content': 0.17421047389507294, 'timestamp': '2025-09-30 22:16:14.222649', 'step': 3383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.257675', 'step': 3383, 'epoch': 1} {'type': 'loss', 'content': 0.14351831376552582, 'timestamp': '2025-09-30 22:16:14.283815', 'step': 3384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.314421', 'step': 3384, 'epoch': 1} {'type': 'loss', 'content': 0.1967344582080841, 'timestamp': '2025-09-30 22:16:14.317253', 'step': 3385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.350070', 'step': 3385, 'epoch': 1} {'type': 'loss', 'content': 0.17347542941570282, 'timestamp': '2025-09-30 22:16:14.352423', 'step': 3386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.399101', 'step': 3386, 'epoch': 1} {'type': 'loss', 'content': 0.12090450525283813, 'timestamp': '2025-09-30 22:16:14.402671', 'step': 3387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.452972', 'step': 3387, 'epoch': 1} {'type': 'loss', 'content': 0.09457533061504364, 'timestamp': '2025-09-30 22:16:14.478364', 'step': 3388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.509032', 'step': 3388, 'epoch': 1} {'type': 'loss', 'content': 0.15851141512393951, 'timestamp': '2025-09-30 22:16:14.511910', 'step': 3389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.543533', 'step': 3389, 'epoch': 1} {'type': 'loss', 'content': 0.22410809993743896, 'timestamp': '2025-09-30 22:16:14.546682', 'step': 3390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.586507', 'step': 3390, 'epoch': 1} {'type': 'loss', 'content': 0.17248331010341644, 'timestamp': '2025-09-30 22:16:14.591919', 'step': 3391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:14.636321', 'step': 3391, 'epoch': 1} {'type': 'loss', 'content': 0.1669655740261078, 'timestamp': '2025-09-30 22:16:14.663025', 'step': 3392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.696782', 'step': 3392, 'epoch': 1} {'type': 'loss', 'content': 0.11526855081319809, 'timestamp': '2025-09-30 22:16:14.699878', 'step': 3393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.744516', 'step': 3393, 'epoch': 1} {'type': 'loss', 'content': 0.1259620189666748, 'timestamp': '2025-09-30 22:16:14.747501', 'step': 3394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.778653', 'step': 3394, 'epoch': 1} {'type': 'loss', 'content': 0.10349167138338089, 'timestamp': '2025-09-30 22:16:14.782159', 'step': 3395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.813110', 'step': 3395, 'epoch': 1} {'type': 'loss', 'content': 0.20817312598228455, 'timestamp': '2025-09-30 22:16:14.838475', 'step': 3396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:14.875455', 'step': 3396, 'epoch': 1} {'type': 'loss', 'content': 0.1719369888305664, 'timestamp': '2025-09-30 22:16:14.883224', 'step': 3397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.921328', 'step': 3397, 'epoch': 1} {'type': 'loss', 'content': 0.1827280968427658, 'timestamp': '2025-09-30 22:16:14.928256', 'step': 3398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:14.963666', 'step': 3398, 'epoch': 1} {'type': 'loss', 'content': 0.20897313952445984, 'timestamp': '2025-09-30 22:16:14.970527', 'step': 3399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.003659', 'step': 3399, 'epoch': 1} {'type': 'loss', 'content': 0.13434094190597534, 'timestamp': '2025-09-30 22:16:15.028601', 'step': 3400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.059635', 'step': 3400, 'epoch': 1} {'type': 'loss', 'content': 0.16003024578094482, 'timestamp': '2025-09-30 22:16:15.063236', 'step': 3401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.097265', 'step': 3401, 'epoch': 1} {'type': 'loss', 'content': 0.13004805147647858, 'timestamp': '2025-09-30 22:16:15.101008', 'step': 3402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:15.131721', 'step': 3402, 'epoch': 1} {'type': 'loss', 'content': 0.28411880135536194, 'timestamp': '2025-09-30 22:16:15.136231', 'step': 3403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.167628', 'step': 3403, 'epoch': 1} {'type': 'loss', 'content': 0.187887504696846, 'timestamp': '2025-09-30 22:16:15.191458', 'step': 3404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.232730', 'step': 3404, 'epoch': 1} {'type': 'loss', 'content': 0.16467353701591492, 'timestamp': '2025-09-30 22:16:15.245352', 'step': 3405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.277021', 'step': 3405, 'epoch': 1} {'type': 'loss', 'content': 0.13720478117465973, 'timestamp': '2025-09-30 22:16:15.282436', 'step': 3406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:15.313781', 'step': 3406, 'epoch': 1} {'type': 'loss', 'content': 0.19653604924678802, 'timestamp': '2025-09-30 22:16:15.316510', 'step': 3407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.348238', 'step': 3407, 'epoch': 1} {'type': 'loss', 'content': 0.1968861222267151, 'timestamp': '2025-09-30 22:16:15.374314', 'step': 3408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.405729', 'step': 3408, 'epoch': 1} {'type': 'loss', 'content': 0.11718270182609558, 'timestamp': '2025-09-30 22:16:15.408225', 'step': 3409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.444648', 'step': 3409, 'epoch': 1} {'type': 'loss', 'content': 0.14136359095573425, 'timestamp': '2025-09-30 22:16:15.450634', 'step': 3410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.486628', 'step': 3410, 'epoch': 1} {'type': 'loss', 'content': 0.06212782859802246, 'timestamp': '2025-09-30 22:16:15.494140', 'step': 3411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:15.525191', 'step': 3411, 'epoch': 1} {'type': 'loss', 'content': 0.24980631470680237, 'timestamp': '2025-09-30 22:16:15.554449', 'step': 3412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.585562', 'step': 3412, 'epoch': 1} {'type': 'loss', 'content': 0.18165084719657898, 'timestamp': '2025-09-30 22:16:15.589405', 'step': 3413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.620772', 'step': 3413, 'epoch': 1} {'type': 'loss', 'content': 0.14527170360088348, 'timestamp': '2025-09-30 22:16:15.623625', 'step': 3414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.665354', 'step': 3414, 'epoch': 1} {'type': 'loss', 'content': 0.12448546290397644, 'timestamp': '2025-09-30 22:16:15.669774', 'step': 3415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.701877', 'step': 3415, 'epoch': 1} {'type': 'loss', 'content': 0.1818254441022873, 'timestamp': '2025-09-30 22:16:15.725987', 'step': 3416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.757691', 'step': 3416, 'epoch': 1} {'type': 'loss', 'content': 0.11915842443704605, 'timestamp': '2025-09-30 22:16:15.761358', 'step': 3417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.793465', 'step': 3417, 'epoch': 1} {'type': 'loss', 'content': 0.1369699388742447, 'timestamp': '2025-09-30 22:16:15.797494', 'step': 3418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.828751', 'step': 3418, 'epoch': 1} {'type': 'loss', 'content': 0.13449203968048096, 'timestamp': '2025-09-30 22:16:15.831664', 'step': 3419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:15.862837', 'step': 3419, 'epoch': 1} {'type': 'loss', 'content': 0.10207705944776535, 'timestamp': '2025-09-30 22:16:15.886958', 'step': 3420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.918023', 'step': 3420, 'epoch': 1} {'type': 'loss', 'content': 0.21169990301132202, 'timestamp': '2025-09-30 22:16:15.930287', 'step': 3421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:15.960711', 'step': 3421, 'epoch': 1} {'type': 'loss', 'content': 0.13352464139461517, 'timestamp': '2025-09-30 22:16:15.965020', 'step': 3422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:15.997389', 'step': 3422, 'epoch': 1} {'type': 'loss', 'content': 0.15644624829292297, 'timestamp': '2025-09-30 22:16:15.999494', 'step': 3423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:16.030047', 'step': 3423, 'epoch': 1} {'type': 'loss', 'content': 0.10261551290750504, 'timestamp': '2025-09-30 22:16:16.054431', 'step': 3424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.087080', 'step': 3424, 'epoch': 1} {'type': 'loss', 'content': 0.10902929306030273, 'timestamp': '2025-09-30 22:16:16.089854', 'step': 3425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.122317', 'step': 3425, 'epoch': 1} {'type': 'loss', 'content': 0.1549191027879715, 'timestamp': '2025-09-30 22:16:16.124615', 'step': 3426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:16.155358', 'step': 3426, 'epoch': 1} {'type': 'loss', 'content': 0.25899967551231384, 'timestamp': '2025-09-30 22:16:16.158250', 'step': 3427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:16.191780', 'step': 3427, 'epoch': 1} {'type': 'loss', 'content': 0.27516815066337585, 'timestamp': '2025-09-30 22:16:16.215565', 'step': 3428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:16.247389', 'step': 3428, 'epoch': 1} {'type': 'loss', 'content': 0.17279966175556183, 'timestamp': '2025-09-30 22:16:16.249643', 'step': 3429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:16.281125', 'step': 3429, 'epoch': 1} {'type': 'loss', 'content': 0.2414890080690384, 'timestamp': '2025-09-30 22:16:16.285188', 'step': 3430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.317431', 'step': 3430, 'epoch': 1} {'type': 'loss', 'content': 0.14860697090625763, 'timestamp': '2025-09-30 22:16:16.319941', 'step': 3431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.350609', 'step': 3431, 'epoch': 1} {'type': 'loss', 'content': 0.14864559471607208, 'timestamp': '2025-09-30 22:16:16.377765', 'step': 3432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.410062', 'step': 3432, 'epoch': 1} {'type': 'loss', 'content': 0.08554006367921829, 'timestamp': '2025-09-30 22:16:16.413581', 'step': 3433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.447605', 'step': 3433, 'epoch': 1} {'type': 'loss', 'content': 0.18474741280078888, 'timestamp': '2025-09-30 22:16:16.450020', 'step': 3434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.480726', 'step': 3434, 'epoch': 1} {'type': 'loss', 'content': 0.1726352423429489, 'timestamp': '2025-09-30 22:16:16.482941', 'step': 3435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.513707', 'step': 3435, 'epoch': 1} {'type': 'loss', 'content': 0.2794223129749298, 'timestamp': '2025-09-30 22:16:16.537341', 'step': 3436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:16.569225', 'step': 3436, 'epoch': 1} {'type': 'loss', 'content': 0.11244823038578033, 'timestamp': '2025-09-30 22:16:16.572034', 'step': 3437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.603720', 'step': 3437, 'epoch': 1} {'type': 'loss', 'content': 0.25456711649894714, 'timestamp': '2025-09-30 22:16:16.605891', 'step': 3438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:16.639834', 'step': 3438, 'epoch': 1} {'type': 'loss', 'content': 0.11655501276254654, 'timestamp': '2025-09-30 22:16:16.646241', 'step': 3439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:16.677923', 'step': 3439, 'epoch': 1} {'type': 'loss', 'content': 0.16107812523841858, 'timestamp': '2025-09-30 22:16:16.702377', 'step': 3440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:16.732323', 'step': 3440, 'epoch': 1} {'type': 'loss', 'content': 0.13203513622283936, 'timestamp': '2025-09-30 22:16:16.734760', 'step': 3441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:16.766716', 'step': 3441, 'epoch': 1} {'type': 'loss', 'content': 0.17514240741729736, 'timestamp': '2025-09-30 22:16:16.770525', 'step': 3442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.800783', 'step': 3442, 'epoch': 1} {'type': 'loss', 'content': 0.16389310359954834, 'timestamp': '2025-09-30 22:16:16.803305', 'step': 3443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:16.834234', 'step': 3443, 'epoch': 1} {'type': 'loss', 'content': 0.10948336124420166, 'timestamp': '2025-09-30 22:16:16.857797', 'step': 3444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:16.887755', 'step': 3444, 'epoch': 1} {'type': 'loss', 'content': 0.1804865300655365, 'timestamp': '2025-09-30 22:16:16.892125', 'step': 3445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:16.924504', 'step': 3445, 'epoch': 1} {'type': 'loss', 'content': 0.15149299800395966, 'timestamp': '2025-09-30 22:16:16.928440', 'step': 3446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.959468', 'step': 3446, 'epoch': 1} {'type': 'loss', 'content': 0.1757240742444992, 'timestamp': '2025-09-30 22:16:16.962120', 'step': 3447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:16.992134', 'step': 3447, 'epoch': 1} {'type': 'loss', 'content': 0.201862633228302, 'timestamp': '2025-09-30 22:16:17.016852', 'step': 3448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:17.048540', 'step': 3448, 'epoch': 1} {'type': 'loss', 'content': 0.1355561465024948, 'timestamp': '2025-09-30 22:16:17.052521', 'step': 3449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:17.085564', 'step': 3449, 'epoch': 1} {'type': 'loss', 'content': 0.1760280579328537, 'timestamp': '2025-09-30 22:16:17.088395', 'step': 3450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.118833', 'step': 3450, 'epoch': 1} {'type': 'loss', 'content': 0.2081473022699356, 'timestamp': '2025-09-30 22:16:17.124604', 'step': 3451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:17.159162', 'step': 3451, 'epoch': 1} {'type': 'loss', 'content': 0.19573421776294708, 'timestamp': '2025-09-30 22:16:17.186708', 'step': 3452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:17.217342', 'step': 3452, 'epoch': 1} {'type': 'loss', 'content': 0.2122635692358017, 'timestamp': '2025-09-30 22:16:17.220148', 'step': 3453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:17.254562', 'step': 3453, 'epoch': 1} {'type': 'loss', 'content': 0.18743754923343658, 'timestamp': '2025-09-30 22:16:17.257435', 'step': 3454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.291751', 'step': 3454, 'epoch': 1} {'type': 'loss', 'content': 0.13825304806232452, 'timestamp': '2025-09-30 22:16:17.294963', 'step': 3455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.326481', 'step': 3455, 'epoch': 1} {'type': 'loss', 'content': 0.19708402454853058, 'timestamp': '2025-09-30 22:16:17.350243', 'step': 3456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.382448', 'step': 3456, 'epoch': 1} {'type': 'loss', 'content': 0.07193957269191742, 'timestamp': '2025-09-30 22:16:17.390350', 'step': 3457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.422197', 'step': 3457, 'epoch': 1} {'type': 'loss', 'content': 0.12194906175136566, 'timestamp': '2025-09-30 22:16:17.425348', 'step': 3458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.466543', 'step': 3458, 'epoch': 1} {'type': 'loss', 'content': 0.20077098906040192, 'timestamp': '2025-09-30 22:16:17.469406', 'step': 3459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:17.507005', 'step': 3459, 'epoch': 1} {'type': 'loss', 'content': 0.12949569523334503, 'timestamp': '2025-09-30 22:16:17.541633', 'step': 3460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.575734', 'step': 3460, 'epoch': 1} {'type': 'loss', 'content': 0.15680013597011566, 'timestamp': '2025-09-30 22:16:17.578228', 'step': 3461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.608875', 'step': 3461, 'epoch': 1} {'type': 'loss', 'content': 0.10254079848527908, 'timestamp': '2025-09-30 22:16:17.611360', 'step': 3462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.649875', 'step': 3462, 'epoch': 1} {'type': 'loss', 'content': 0.14743444323539734, 'timestamp': '2025-09-30 22:16:17.652690', 'step': 3463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.686171', 'step': 3463, 'epoch': 1} {'type': 'loss', 'content': 0.13909558951854706, 'timestamp': '2025-09-30 22:16:17.710647', 'step': 3464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.749624', 'step': 3464, 'epoch': 1} {'type': 'loss', 'content': 0.17087289690971375, 'timestamp': '2025-09-30 22:16:17.752867', 'step': 3465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.787623', 'step': 3465, 'epoch': 1} {'type': 'loss', 'content': 0.12183471024036407, 'timestamp': '2025-09-30 22:16:17.790393', 'step': 3466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.828404', 'step': 3466, 'epoch': 1} {'type': 'loss', 'content': 0.19605274498462677, 'timestamp': '2025-09-30 22:16:17.834676', 'step': 3467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.871174', 'step': 3467, 'epoch': 1} {'type': 'loss', 'content': 0.15795063972473145, 'timestamp': '2025-09-30 22:16:17.898212', 'step': 3468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:17.934768', 'step': 3468, 'epoch': 1} {'type': 'loss', 'content': 0.1761719435453415, 'timestamp': '2025-09-30 22:16:17.937723', 'step': 3469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:17.975356', 'step': 3469, 'epoch': 1} {'type': 'loss', 'content': 0.17198656499385834, 'timestamp': '2025-09-30 22:16:17.981763', 'step': 3470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:18.016691', 'step': 3470, 'epoch': 1} {'type': 'loss', 'content': 0.232539564371109, 'timestamp': '2025-09-30 22:16:18.023680', 'step': 3471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.060793', 'step': 3471, 'epoch': 1} {'type': 'loss', 'content': 0.1471019983291626, 'timestamp': '2025-09-30 22:16:18.085006', 'step': 3472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:18.119412', 'step': 3472, 'epoch': 1} {'type': 'loss', 'content': 0.14345407485961914, 'timestamp': '2025-09-30 22:16:18.125072', 'step': 3473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:18.161461', 'step': 3473, 'epoch': 1} {'type': 'loss', 'content': 0.18753916025161743, 'timestamp': '2025-09-30 22:16:18.167395', 'step': 3474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:18.204182', 'step': 3474, 'epoch': 1} {'type': 'loss', 'content': 0.16064278781414032, 'timestamp': '2025-09-30 22:16:18.210430', 'step': 3475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.246006', 'step': 3475, 'epoch': 1} {'type': 'loss', 'content': 0.2696923017501831, 'timestamp': '2025-09-30 22:16:18.274889', 'step': 3476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.309345', 'step': 3476, 'epoch': 1} {'type': 'loss', 'content': 0.15110501646995544, 'timestamp': '2025-09-30 22:16:18.316104', 'step': 3477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.348017', 'step': 3477, 'epoch': 1} {'type': 'loss', 'content': 0.15182781219482422, 'timestamp': '2025-09-30 22:16:18.354160', 'step': 3478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.390388', 'step': 3478, 'epoch': 1} {'type': 'loss', 'content': 0.16485856473445892, 'timestamp': '2025-09-30 22:16:18.393131', 'step': 3479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.435266', 'step': 3479, 'epoch': 1} {'type': 'loss', 'content': 0.19654959440231323, 'timestamp': '2025-09-30 22:16:18.462871', 'step': 3480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:18.498955', 'step': 3480, 'epoch': 1} {'type': 'loss', 'content': 0.1568128615617752, 'timestamp': '2025-09-30 22:16:18.502239', 'step': 3481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:18.539460', 'step': 3481, 'epoch': 1} {'type': 'loss', 'content': 0.2387654185295105, 'timestamp': '2025-09-30 22:16:18.542053', 'step': 3482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:18.573659', 'step': 3482, 'epoch': 1} {'type': 'loss', 'content': 0.1949867606163025, 'timestamp': '2025-09-30 22:16:18.576363', 'step': 3483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.612984', 'step': 3483, 'epoch': 1} {'type': 'loss', 'content': 0.15462873876094818, 'timestamp': '2025-09-30 22:16:18.640950', 'step': 3484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.676739', 'step': 3484, 'epoch': 1} {'type': 'loss', 'content': 0.1407565027475357, 'timestamp': '2025-09-30 22:16:18.679667', 'step': 3485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.717064', 'step': 3485, 'epoch': 1} {'type': 'loss', 'content': 0.1154937669634819, 'timestamp': '2025-09-30 22:16:18.723742', 'step': 3486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.760187', 'step': 3486, 'epoch': 1} {'type': 'loss', 'content': 0.17630328238010406, 'timestamp': '2025-09-30 22:16:18.762783', 'step': 3487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:18.794314', 'step': 3487, 'epoch': 1} {'type': 'loss', 'content': 0.146789088845253, 'timestamp': '2025-09-30 22:16:18.823338', 'step': 3488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.873329', 'step': 3488, 'epoch': 1} {'type': 'loss', 'content': 0.19005878269672394, 'timestamp': '2025-09-30 22:16:18.876306', 'step': 3489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:18.907282', 'step': 3489, 'epoch': 1} {'type': 'loss', 'content': 0.1428634226322174, 'timestamp': '2025-09-30 22:16:18.909951', 'step': 3490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:18.940161', 'step': 3490, 'epoch': 1} {'type': 'loss', 'content': 0.19272291660308838, 'timestamp': '2025-09-30 22:16:18.943243', 'step': 3491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:18.974124', 'step': 3491, 'epoch': 1} {'type': 'loss', 'content': 0.15151309967041016, 'timestamp': '2025-09-30 22:16:18.998281', 'step': 3492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:19.030066', 'step': 3492, 'epoch': 1} {'type': 'loss', 'content': 0.09670347720384598, 'timestamp': '2025-09-30 22:16:19.033119', 'step': 3493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:19.069066', 'step': 3493, 'epoch': 1} {'type': 'loss', 'content': 0.18919254839420319, 'timestamp': '2025-09-30 22:16:19.073541', 'step': 3494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:19.105338', 'step': 3494, 'epoch': 1} {'type': 'loss', 'content': 0.1401205211877823, 'timestamp': '2025-09-30 22:16:19.108631', 'step': 3495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:19.139813', 'step': 3495, 'epoch': 1} {'type': 'loss', 'content': 0.347096711397171, 'timestamp': '2025-09-30 22:16:19.164255', 'step': 3496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:19.207413', 'step': 3496, 'epoch': 1} {'type': 'loss', 'content': 0.10416138172149658, 'timestamp': '2025-09-30 22:16:19.210180', 'step': 3497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:19.244467', 'step': 3497, 'epoch': 1} {'type': 'loss', 'content': 0.182961106300354, 'timestamp': '2025-09-30 22:16:19.247524', 'step': 3498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:19.283325', 'step': 3498, 'epoch': 1} {'type': 'loss', 'content': 0.1293771117925644, 'timestamp': '2025-09-30 22:16:19.286169', 'step': 3499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:19.317177', 'step': 3499, 'epoch': 1} {'type': 'loss', 'content': 0.10236164927482605, 'timestamp': '2025-09-30 22:16:19.344688', 'step': 3500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 3500', 'timestamp': '2025-09-30 22:16:23.724208', 'step': 3500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:23.768065', 'step': 3500, 'epoch': 1} {'type': 'loss', 'content': 0.19521433115005493, 'timestamp': '2025-09-30 22:16:23.772573', 'step': 3501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:23.804418', 'step': 3501, 'epoch': 1} {'type': 'loss', 'content': 0.11483389884233475, 'timestamp': '2025-09-30 22:16:23.809207', 'step': 3502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:23.840521', 'step': 3502, 'epoch': 1} {'type': 'loss', 'content': 0.11725283414125443, 'timestamp': '2025-09-30 22:16:23.844160', 'step': 3503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:23.887807', 'step': 3503, 'epoch': 1} {'type': 'loss', 'content': 0.1905261129140854, 'timestamp': '2025-09-30 22:16:23.926102', 'step': 3504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:23.957472', 'step': 3504, 'epoch': 1} {'type': 'loss', 'content': 0.13908563554286957, 'timestamp': '2025-09-30 22:16:23.960789', 'step': 3505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:23.993074', 'step': 3505, 'epoch': 1} {'type': 'loss', 'content': 0.12678438425064087, 'timestamp': '2025-09-30 22:16:24.002782', 'step': 3506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.040074', 'step': 3506, 'epoch': 1} {'type': 'loss', 'content': 0.191234290599823, 'timestamp': '2025-09-30 22:16:24.043927', 'step': 3507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.074443', 'step': 3507, 'epoch': 1} {'type': 'loss', 'content': 0.12710881233215332, 'timestamp': '2025-09-30 22:16:24.104416', 'step': 3508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.134838', 'step': 3508, 'epoch': 1} {'type': 'loss', 'content': 0.1372249275445938, 'timestamp': '2025-09-30 22:16:24.146413', 'step': 3509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.183833', 'step': 3509, 'epoch': 1} {'type': 'loss', 'content': 0.195962056517601, 'timestamp': '2025-09-30 22:16:24.186947', 'step': 3510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.219609', 'step': 3510, 'epoch': 1} {'type': 'loss', 'content': 0.23632287979125977, 'timestamp': '2025-09-30 22:16:24.230236', 'step': 3511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:24.262999', 'step': 3511, 'epoch': 1} {'type': 'loss', 'content': 0.16372473537921906, 'timestamp': '2025-09-30 22:16:24.291281', 'step': 3512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.322429', 'step': 3512, 'epoch': 1} {'type': 'loss', 'content': 0.14326030015945435, 'timestamp': '2025-09-30 22:16:24.331792', 'step': 3513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:24.363986', 'step': 3513, 'epoch': 1} {'type': 'loss', 'content': 0.17822881042957306, 'timestamp': '2025-09-30 22:16:24.368249', 'step': 3514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.399994', 'step': 3514, 'epoch': 1} {'type': 'loss', 'content': 0.2812262177467346, 'timestamp': '2025-09-30 22:16:24.403317', 'step': 3515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.434774', 'step': 3515, 'epoch': 1} {'type': 'loss', 'content': 0.128549724817276, 'timestamp': '2025-09-30 22:16:24.459827', 'step': 3516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.491103', 'step': 3516, 'epoch': 1} {'type': 'loss', 'content': 0.15763862431049347, 'timestamp': '2025-09-30 22:16:24.494064', 'step': 3517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:24.525944', 'step': 3517, 'epoch': 1} {'type': 'loss', 'content': 0.14519503712654114, 'timestamp': '2025-09-30 22:16:24.529558', 'step': 3518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:24.561082', 'step': 3518, 'epoch': 1} {'type': 'loss', 'content': 0.21624983847141266, 'timestamp': '2025-09-30 22:16:24.568303', 'step': 3519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.605626', 'step': 3519, 'epoch': 1} {'type': 'loss', 'content': 0.1988615244626999, 'timestamp': '2025-09-30 22:16:24.630304', 'step': 3520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:24.662128', 'step': 3520, 'epoch': 1} {'type': 'loss', 'content': 0.1815590113401413, 'timestamp': '2025-09-30 22:16:24.664954', 'step': 3521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.696683', 'step': 3521, 'epoch': 1} {'type': 'loss', 'content': 0.1113775447010994, 'timestamp': '2025-09-30 22:16:24.700206', 'step': 3522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:24.731965', 'step': 3522, 'epoch': 1} {'type': 'loss', 'content': 0.2043483704328537, 'timestamp': '2025-09-30 22:16:24.735520', 'step': 3523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.767030', 'step': 3523, 'epoch': 1} {'type': 'loss', 'content': 0.12258382141590118, 'timestamp': '2025-09-30 22:16:24.791332', 'step': 3524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:24.831939', 'step': 3524, 'epoch': 1} {'type': 'loss', 'content': 0.12690691649913788, 'timestamp': '2025-09-30 22:16:24.835152', 'step': 3525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:24.872939', 'step': 3525, 'epoch': 1} {'type': 'loss', 'content': 0.14502227306365967, 'timestamp': '2025-09-30 22:16:24.876614', 'step': 3526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:24.915109', 'step': 3526, 'epoch': 1} {'type': 'loss', 'content': 0.2708823084831238, 'timestamp': '2025-09-30 22:16:24.925034', 'step': 3527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:24.956164', 'step': 3527, 'epoch': 1} {'type': 'loss', 'content': 0.21726267039775848, 'timestamp': '2025-09-30 22:16:24.986812', 'step': 3528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:25.030916', 'step': 3528, 'epoch': 1} {'type': 'loss', 'content': 0.14132535457611084, 'timestamp': '2025-09-30 22:16:25.037228', 'step': 3529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:25.083537', 'step': 3529, 'epoch': 1} {'type': 'loss', 'content': 0.21006609499454498, 'timestamp': '2025-09-30 22:16:25.092468', 'step': 3530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:25.142297', 'step': 3530, 'epoch': 1} {'type': 'loss', 'content': 0.22984236478805542, 'timestamp': '2025-09-30 22:16:25.144606', 'step': 3531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:25.206535', 'step': 3531, 'epoch': 1} {'type': 'loss', 'content': 0.13342368602752686, 'timestamp': '2025-09-30 22:16:25.231571', 'step': 3532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:25.294768', 'step': 3532, 'epoch': 1} {'type': 'loss', 'content': 0.21681904792785645, 'timestamp': '2025-09-30 22:16:25.303055', 'step': 3533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:25.363023', 'step': 3533, 'epoch': 1} {'type': 'loss', 'content': 0.19375668466091156, 'timestamp': '2025-09-30 22:16:25.366417', 'step': 3534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:25.421130', 'step': 3534, 'epoch': 1} {'type': 'loss', 'content': 0.1240404024720192, 'timestamp': '2025-09-30 22:16:25.423508', 'step': 3535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:25.476914', 'step': 3535, 'epoch': 1} {'type': 'loss', 'content': 0.1867688149213791, 'timestamp': '2025-09-30 22:16:25.506815', 'step': 3536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:25.569447', 'step': 3536, 'epoch': 1} {'type': 'loss', 'content': 0.16911138594150543, 'timestamp': '2025-09-30 22:16:25.572432', 'step': 3537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:25.618166', 'step': 3537, 'epoch': 1} {'type': 'loss', 'content': 0.08189212530851364, 'timestamp': '2025-09-30 22:16:25.627030', 'step': 3538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:25.701417', 'step': 3538, 'epoch': 1} {'type': 'loss', 'content': 0.23786330223083496, 'timestamp': '2025-09-30 22:16:25.704101', 'step': 3539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:25.777891', 'step': 3539, 'epoch': 1} {'type': 'loss', 'content': 0.23668541014194489, 'timestamp': '2025-09-30 22:16:25.803088', 'step': 3540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:25.850182', 'step': 3540, 'epoch': 1} {'type': 'loss', 'content': 0.24651405215263367, 'timestamp': '2025-09-30 22:16:25.856544', 'step': 3541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:25.913588', 'step': 3541, 'epoch': 1} {'type': 'loss', 'content': 0.23651303350925446, 'timestamp': '2025-09-30 22:16:25.917828', 'step': 3542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:25.986032', 'step': 3542, 'epoch': 1} {'type': 'loss', 'content': 0.17722217738628387, 'timestamp': '2025-09-30 22:16:25.990704', 'step': 3543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.047928', 'step': 3543, 'epoch': 1} {'type': 'loss', 'content': 0.1286451369524002, 'timestamp': '2025-09-30 22:16:26.072152', 'step': 3544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.122344', 'step': 3544, 'epoch': 1} {'type': 'loss', 'content': 0.19889995455741882, 'timestamp': '2025-09-30 22:16:26.126984', 'step': 3545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.194624', 'step': 3545, 'epoch': 1} {'type': 'loss', 'content': 0.2458517849445343, 'timestamp': '2025-09-30 22:16:26.197113', 'step': 3546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:26.255743', 'step': 3546, 'epoch': 1} {'type': 'loss', 'content': 0.16833515465259552, 'timestamp': '2025-09-30 22:16:26.259871', 'step': 3547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:26.302259', 'step': 3547, 'epoch': 1} {'type': 'loss', 'content': 0.09272121638059616, 'timestamp': '2025-09-30 22:16:26.329983', 'step': 3548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:26.362486', 'step': 3548, 'epoch': 1} {'type': 'loss', 'content': 0.1364498883485794, 'timestamp': '2025-09-30 22:16:26.366770', 'step': 3549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.401055', 'step': 3549, 'epoch': 1} {'type': 'loss', 'content': 0.21071597933769226, 'timestamp': '2025-09-30 22:16:26.408003', 'step': 3550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:26.443732', 'step': 3550, 'epoch': 1} {'type': 'loss', 'content': 0.11179596930742264, 'timestamp': '2025-09-30 22:16:26.448060', 'step': 3551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:26.479897', 'step': 3551, 'epoch': 1} {'type': 'loss', 'content': 0.1078045442700386, 'timestamp': '2025-09-30 22:16:26.504498', 'step': 3552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.535193', 'step': 3552, 'epoch': 1} {'type': 'loss', 'content': 0.10589458048343658, 'timestamp': '2025-09-30 22:16:26.537886', 'step': 3553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:26.571599', 'step': 3553, 'epoch': 1} {'type': 'loss', 'content': 0.1095471903681755, 'timestamp': '2025-09-30 22:16:26.575985', 'step': 3554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:26.610508', 'step': 3554, 'epoch': 1} {'type': 'loss', 'content': 0.16411814093589783, 'timestamp': '2025-09-30 22:16:26.615799', 'step': 3555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:26.647018', 'step': 3555, 'epoch': 1} {'type': 'loss', 'content': 0.19239239394664764, 'timestamp': '2025-09-30 22:16:26.673170', 'step': 3556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:26.705608', 'step': 3556, 'epoch': 1} {'type': 'loss', 'content': 0.1375643014907837, 'timestamp': '2025-09-30 22:16:26.711019', 'step': 3557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.744506', 'step': 3557, 'epoch': 1} {'type': 'loss', 'content': 0.1776016652584076, 'timestamp': '2025-09-30 22:16:26.747759', 'step': 3558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.778592', 'step': 3558, 'epoch': 1} {'type': 'loss', 'content': 0.23404085636138916, 'timestamp': '2025-09-30 22:16:26.784812', 'step': 3559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.817990', 'step': 3559, 'epoch': 1} {'type': 'loss', 'content': 0.07973335683345795, 'timestamp': '2025-09-30 22:16:26.841592', 'step': 3560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:26.874219', 'step': 3560, 'epoch': 1} {'type': 'loss', 'content': 0.22163358330726624, 'timestamp': '2025-09-30 22:16:26.876767', 'step': 3561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:26.910577', 'step': 3561, 'epoch': 1} {'type': 'loss', 'content': 0.20088081061840057, 'timestamp': '2025-09-30 22:16:26.912963', 'step': 3562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:26.943689', 'step': 3562, 'epoch': 1} {'type': 'loss', 'content': 0.14101055264472961, 'timestamp': '2025-09-30 22:16:26.951270', 'step': 3563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:26.982402', 'step': 3563, 'epoch': 1} {'type': 'loss', 'content': 0.16362755000591278, 'timestamp': '2025-09-30 22:16:27.006630', 'step': 3564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.037105', 'step': 3564, 'epoch': 1} {'type': 'loss', 'content': 0.14345954358577728, 'timestamp': '2025-09-30 22:16:27.041343', 'step': 3565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:27.074050', 'step': 3565, 'epoch': 1} {'type': 'loss', 'content': 0.24826455116271973, 'timestamp': '2025-09-30 22:16:27.076473', 'step': 3566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.108597', 'step': 3566, 'epoch': 1} {'type': 'loss', 'content': 0.07257761061191559, 'timestamp': '2025-09-30 22:16:27.112792', 'step': 3567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.145021', 'step': 3567, 'epoch': 1} {'type': 'loss', 'content': 0.12089566886425018, 'timestamp': '2025-09-30 22:16:27.170023', 'step': 3568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:27.201105', 'step': 3568, 'epoch': 1} {'type': 'loss', 'content': 0.2063853144645691, 'timestamp': '2025-09-30 22:16:27.204046', 'step': 3569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.236572', 'step': 3569, 'epoch': 1} {'type': 'loss', 'content': 0.22080418467521667, 'timestamp': '2025-09-30 22:16:27.240962', 'step': 3570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:27.274034', 'step': 3570, 'epoch': 1} {'type': 'loss', 'content': 0.17426477372646332, 'timestamp': '2025-09-30 22:16:27.278426', 'step': 3571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:27.308634', 'step': 3571, 'epoch': 1} {'type': 'loss', 'content': 0.15869657695293427, 'timestamp': '2025-09-30 22:16:27.334788', 'step': 3572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:27.367427', 'step': 3572, 'epoch': 1} {'type': 'loss', 'content': 0.17009243369102478, 'timestamp': '2025-09-30 22:16:27.370511', 'step': 3573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:27.401267', 'step': 3573, 'epoch': 1} {'type': 'loss', 'content': 0.22458623349666595, 'timestamp': '2025-09-30 22:16:27.403748', 'step': 3574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:27.434022', 'step': 3574, 'epoch': 1} {'type': 'loss', 'content': 0.17082378268241882, 'timestamp': '2025-09-30 22:16:27.437799', 'step': 3575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.468326', 'step': 3575, 'epoch': 1} {'type': 'loss', 'content': 0.24186453223228455, 'timestamp': '2025-09-30 22:16:27.495461', 'step': 3576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:27.532800', 'step': 3576, 'epoch': 1} {'type': 'loss', 'content': 0.24925673007965088, 'timestamp': '2025-09-30 22:16:27.535266', 'step': 3577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:27.567076', 'step': 3577, 'epoch': 1} {'type': 'loss', 'content': 0.19952118396759033, 'timestamp': '2025-09-30 22:16:27.571059', 'step': 3578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:27.612779', 'step': 3578, 'epoch': 1} {'type': 'loss', 'content': 0.08260788023471832, 'timestamp': '2025-09-30 22:16:27.615223', 'step': 3579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.648556', 'step': 3579, 'epoch': 1} {'type': 'loss', 'content': 0.08103100955486298, 'timestamp': '2025-09-30 22:16:27.675374', 'step': 3580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:27.706978', 'step': 3580, 'epoch': 1} {'type': 'loss', 'content': 0.10466928780078888, 'timestamp': '2025-09-30 22:16:27.709773', 'step': 3581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:27.741694', 'step': 3581, 'epoch': 1} {'type': 'loss', 'content': 0.13391192257404327, 'timestamp': '2025-09-30 22:16:27.745897', 'step': 3582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.776559', 'step': 3582, 'epoch': 1} {'type': 'loss', 'content': 0.16383154690265656, 'timestamp': '2025-09-30 22:16:27.781390', 'step': 3583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:27.813654', 'step': 3583, 'epoch': 1} {'type': 'loss', 'content': 0.1527029275894165, 'timestamp': '2025-09-30 22:16:27.837745', 'step': 3584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.867816', 'step': 3584, 'epoch': 1} {'type': 'loss', 'content': 0.20826362073421478, 'timestamp': '2025-09-30 22:16:27.873530', 'step': 3585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:27.904158', 'step': 3585, 'epoch': 1} {'type': 'loss', 'content': 0.2829727530479431, 'timestamp': '2025-09-30 22:16:27.906865', 'step': 3586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.937495', 'step': 3586, 'epoch': 1} {'type': 'loss', 'content': 0.12891285121440887, 'timestamp': '2025-09-30 22:16:27.943334', 'step': 3587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:27.981041', 'step': 3587, 'epoch': 1} {'type': 'loss', 'content': 0.12516620755195618, 'timestamp': '2025-09-30 22:16:28.008476', 'step': 3588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.042886', 'step': 3588, 'epoch': 1} {'type': 'loss', 'content': 0.14711029827594757, 'timestamp': '2025-09-30 22:16:28.045473', 'step': 3589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.075960', 'step': 3589, 'epoch': 1} {'type': 'loss', 'content': 0.21213527023792267, 'timestamp': '2025-09-30 22:16:28.081866', 'step': 3590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.114665', 'step': 3590, 'epoch': 1} {'type': 'loss', 'content': 0.10492868721485138, 'timestamp': '2025-09-30 22:16:28.116934', 'step': 3591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:28.148348', 'step': 3591, 'epoch': 1} {'type': 'loss', 'content': 0.12180234491825104, 'timestamp': '2025-09-30 22:16:28.175294', 'step': 3592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:28.210554', 'step': 3592, 'epoch': 1} {'type': 'loss', 'content': 0.15242278575897217, 'timestamp': '2025-09-30 22:16:28.216394', 'step': 3593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.261290', 'step': 3593, 'epoch': 1} {'type': 'loss', 'content': 0.18794092535972595, 'timestamp': '2025-09-30 22:16:28.267649', 'step': 3594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:28.299245', 'step': 3594, 'epoch': 1} {'type': 'loss', 'content': 0.14251920580863953, 'timestamp': '2025-09-30 22:16:28.301901', 'step': 3595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:28.332561', 'step': 3595, 'epoch': 1} {'type': 'loss', 'content': 0.09871752560138702, 'timestamp': '2025-09-30 22:16:28.359860', 'step': 3596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:28.392059', 'step': 3596, 'epoch': 1} {'type': 'loss', 'content': 0.2105448693037033, 'timestamp': '2025-09-30 22:16:28.395054', 'step': 3597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:28.427275', 'step': 3597, 'epoch': 1} {'type': 'loss', 'content': 0.11059535294771194, 'timestamp': '2025-09-30 22:16:28.433349', 'step': 3598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.465004', 'step': 3598, 'epoch': 1} {'type': 'loss', 'content': 0.13482694327831268, 'timestamp': '2025-09-30 22:16:28.468015', 'step': 3599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:28.499891', 'step': 3599, 'epoch': 1} {'type': 'loss', 'content': 0.21903112530708313, 'timestamp': '2025-09-30 22:16:28.524215', 'step': 3600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.560336', 'step': 3600, 'epoch': 1} {'type': 'loss', 'content': 0.18967434763908386, 'timestamp': '2025-09-30 22:16:28.562822', 'step': 3601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.594285', 'step': 3601, 'epoch': 1} {'type': 'loss', 'content': 0.12209390103816986, 'timestamp': '2025-09-30 22:16:28.601378', 'step': 3602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:28.632796', 'step': 3602, 'epoch': 1} {'type': 'loss', 'content': 0.10711424052715302, 'timestamp': '2025-09-30 22:16:28.635410', 'step': 3603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:28.670013', 'step': 3603, 'epoch': 1} {'type': 'loss', 'content': 0.17192940413951874, 'timestamp': '2025-09-30 22:16:28.696518', 'step': 3604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.730333', 'step': 3604, 'epoch': 1} {'type': 'loss', 'content': 0.1016259416937828, 'timestamp': '2025-09-30 22:16:28.733502', 'step': 3605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:28.768146', 'step': 3605, 'epoch': 1} {'type': 'loss', 'content': 0.15233385562896729, 'timestamp': '2025-09-30 22:16:28.773587', 'step': 3606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.807733', 'step': 3606, 'epoch': 1} {'type': 'loss', 'content': 0.27181535959243774, 'timestamp': '2025-09-30 22:16:28.814669', 'step': 3607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.850286', 'step': 3607, 'epoch': 1} {'type': 'loss', 'content': 0.243491068482399, 'timestamp': '2025-09-30 22:16:28.876777', 'step': 3608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.909794', 'step': 3608, 'epoch': 1} {'type': 'loss', 'content': 0.1578126698732376, 'timestamp': '2025-09-30 22:16:28.917556', 'step': 3609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:28.951693', 'step': 3609, 'epoch': 1} {'type': 'loss', 'content': 0.14903314411640167, 'timestamp': '2025-09-30 22:16:28.958669', 'step': 3610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:28.992994', 'step': 3610, 'epoch': 1} {'type': 'loss', 'content': 0.25956645607948303, 'timestamp': '2025-09-30 22:16:28.995643', 'step': 3611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:29.026716', 'step': 3611, 'epoch': 1} {'type': 'loss', 'content': 0.19912390410900116, 'timestamp': '2025-09-30 22:16:29.057362', 'step': 3612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:29.095813', 'step': 3612, 'epoch': 1} {'type': 'loss', 'content': 0.15063171088695526, 'timestamp': '2025-09-30 22:16:29.101038', 'step': 3613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.140454', 'step': 3613, 'epoch': 1} {'type': 'loss', 'content': 0.1887400597333908, 'timestamp': '2025-09-30 22:16:29.152316', 'step': 3614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:29.192919', 'step': 3614, 'epoch': 1} {'type': 'loss', 'content': 0.10196220129728317, 'timestamp': '2025-09-30 22:16:29.196138', 'step': 3615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:29.226698', 'step': 3615, 'epoch': 1} {'type': 'loss', 'content': 0.19520477950572968, 'timestamp': '2025-09-30 22:16:29.250961', 'step': 3616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:29.286822', 'step': 3616, 'epoch': 1} {'type': 'loss', 'content': 0.1285654902458191, 'timestamp': '2025-09-30 22:16:29.289621', 'step': 3617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.320653', 'step': 3617, 'epoch': 1} {'type': 'loss', 'content': 0.17002597451210022, 'timestamp': '2025-09-30 22:16:29.327516', 'step': 3618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.358208', 'step': 3618, 'epoch': 1} {'type': 'loss', 'content': 0.17430372536182404, 'timestamp': '2025-09-30 22:16:29.366561', 'step': 3619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.404397', 'step': 3619, 'epoch': 1} {'type': 'loss', 'content': 0.21745532751083374, 'timestamp': '2025-09-30 22:16:29.433250', 'step': 3620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:29.467551', 'step': 3620, 'epoch': 1} {'type': 'loss', 'content': 0.18306128680706024, 'timestamp': '2025-09-30 22:16:29.473832', 'step': 3621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:29.508611', 'step': 3621, 'epoch': 1} {'type': 'loss', 'content': 0.1219007670879364, 'timestamp': '2025-09-30 22:16:29.511439', 'step': 3622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.543293', 'step': 3622, 'epoch': 1} {'type': 'loss', 'content': 0.10429231822490692, 'timestamp': '2025-09-30 22:16:29.549019', 'step': 3623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.583094', 'step': 3623, 'epoch': 1} {'type': 'loss', 'content': 0.2919674813747406, 'timestamp': '2025-09-30 22:16:29.608270', 'step': 3624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.645274', 'step': 3624, 'epoch': 1} {'type': 'loss', 'content': 0.20961694419384003, 'timestamp': '2025-09-30 22:16:29.650355', 'step': 3625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:29.683023', 'step': 3625, 'epoch': 1} {'type': 'loss', 'content': 0.08400371670722961, 'timestamp': '2025-09-30 22:16:29.687331', 'step': 3626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.720849', 'step': 3626, 'epoch': 1} {'type': 'loss', 'content': 0.1640816479921341, 'timestamp': '2025-09-30 22:16:29.723410', 'step': 3627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.756707', 'step': 3627, 'epoch': 1} {'type': 'loss', 'content': 0.1277274191379547, 'timestamp': '2025-09-30 22:16:29.787090', 'step': 3628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.823089', 'step': 3628, 'epoch': 1} {'type': 'loss', 'content': 0.23392753303050995, 'timestamp': '2025-09-30 22:16:29.825703', 'step': 3629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:29.860746', 'step': 3629, 'epoch': 1} {'type': 'loss', 'content': 0.17470328509807587, 'timestamp': '2025-09-30 22:16:29.863430', 'step': 3630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:29.894596', 'step': 3630, 'epoch': 1} {'type': 'loss', 'content': 0.2028501033782959, 'timestamp': '2025-09-30 22:16:29.899775', 'step': 3631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.932911', 'step': 3631, 'epoch': 1} {'type': 'loss', 'content': 0.17579951882362366, 'timestamp': '2025-09-30 22:16:29.959001', 'step': 3632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:29.991867', 'step': 3632, 'epoch': 1} {'type': 'loss', 'content': 0.23772817850112915, 'timestamp': '2025-09-30 22:16:29.995642', 'step': 3633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:30.027828', 'step': 3633, 'epoch': 1} {'type': 'loss', 'content': 0.1758302003145218, 'timestamp': '2025-09-30 22:16:30.033556', 'step': 3634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:30.063695', 'step': 3634, 'epoch': 1} {'type': 'loss', 'content': 0.10410264134407043, 'timestamp': '2025-09-30 22:16:30.066546', 'step': 3635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.096748', 'step': 3635, 'epoch': 1} {'type': 'loss', 'content': 0.2395186424255371, 'timestamp': '2025-09-30 22:16:30.123360', 'step': 3636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:30.161532', 'step': 3636, 'epoch': 1} {'type': 'loss', 'content': 0.15299735963344574, 'timestamp': '2025-09-30 22:16:30.163810', 'step': 3637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.194112', 'step': 3637, 'epoch': 1} {'type': 'loss', 'content': 0.14215345680713654, 'timestamp': '2025-09-30 22:16:30.197990', 'step': 3638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:30.231485', 'step': 3638, 'epoch': 1} {'type': 'loss', 'content': 0.1673639565706253, 'timestamp': '2025-09-30 22:16:30.236939', 'step': 3639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:30.269472', 'step': 3639, 'epoch': 1} {'type': 'loss', 'content': 0.19767260551452637, 'timestamp': '2025-09-30 22:16:30.293482', 'step': 3640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:30.328397', 'step': 3640, 'epoch': 1} {'type': 'loss', 'content': 0.19919900596141815, 'timestamp': '2025-09-30 22:16:30.330546', 'step': 3641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.364335', 'step': 3641, 'epoch': 1} {'type': 'loss', 'content': 0.17659585177898407, 'timestamp': '2025-09-30 22:16:30.367108', 'step': 3642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:30.398491', 'step': 3642, 'epoch': 1} {'type': 'loss', 'content': 0.14029203355312347, 'timestamp': '2025-09-30 22:16:30.401165', 'step': 3643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:30.438804', 'step': 3643, 'epoch': 1} {'type': 'loss', 'content': 0.10328056663274765, 'timestamp': '2025-09-30 22:16:30.464268', 'step': 3644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:30.498681', 'step': 3644, 'epoch': 1} {'type': 'loss', 'content': 0.08492593467235565, 'timestamp': '2025-09-30 22:16:30.504228', 'step': 3645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:30.536454', 'step': 3645, 'epoch': 1} {'type': 'loss', 'content': 0.13896596431732178, 'timestamp': '2025-09-30 22:16:30.542344', 'step': 3646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:30.575749', 'step': 3646, 'epoch': 1} {'type': 'loss', 'content': 0.17027969658374786, 'timestamp': '2025-09-30 22:16:30.579268', 'step': 3647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:30.612642', 'step': 3647, 'epoch': 1} {'type': 'loss', 'content': 0.2686164677143097, 'timestamp': '2025-09-30 22:16:30.637741', 'step': 3648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:30.670862', 'step': 3648, 'epoch': 1} {'type': 'loss', 'content': 0.18366825580596924, 'timestamp': '2025-09-30 22:16:30.673410', 'step': 3649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:30.706093', 'step': 3649, 'epoch': 1} {'type': 'loss', 'content': 0.28599897027015686, 'timestamp': '2025-09-30 22:16:30.710466', 'step': 3650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.740919', 'step': 3650, 'epoch': 1} {'type': 'loss', 'content': 0.1408168226480484, 'timestamp': '2025-09-30 22:16:30.743169', 'step': 3651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-30 22:16:30.791419', 'step': 3651, 'epoch': 1} {'type': 'loss', 'content': 0.12157707661390305, 'timestamp': '2025-09-30 22:16:30.821863', 'step': 3652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:30.852709', 'step': 3652, 'epoch': 1} {'type': 'loss', 'content': 0.13278695940971375, 'timestamp': '2025-09-30 22:16:30.857662', 'step': 3653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.890924', 'step': 3653, 'epoch': 1} {'type': 'loss', 'content': 0.14508938789367676, 'timestamp': '2025-09-30 22:16:30.895021', 'step': 3654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.928346', 'step': 3654, 'epoch': 1} {'type': 'loss', 'content': 0.12595784664154053, 'timestamp': '2025-09-30 22:16:30.932596', 'step': 3655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:30.965047', 'step': 3655, 'epoch': 1} {'type': 'loss', 'content': 0.1461065262556076, 'timestamp': '2025-09-30 22:16:30.990800', 'step': 3656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.024666', 'step': 3656, 'epoch': 1} {'type': 'loss', 'content': 0.19685354828834534, 'timestamp': '2025-09-30 22:16:31.028886', 'step': 3657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.060350', 'step': 3657, 'epoch': 1} {'type': 'loss', 'content': 0.16139177978038788, 'timestamp': '2025-09-30 22:16:31.064630', 'step': 3658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.097662', 'step': 3658, 'epoch': 1} {'type': 'loss', 'content': 0.1466737538576126, 'timestamp': '2025-09-30 22:16:31.102353', 'step': 3659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:31.132814', 'step': 3659, 'epoch': 1} {'type': 'loss', 'content': 0.296220064163208, 'timestamp': '2025-09-30 22:16:31.158976', 'step': 3660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.191471', 'step': 3660, 'epoch': 1} {'type': 'loss', 'content': 0.19922029972076416, 'timestamp': '2025-09-30 22:16:31.195764', 'step': 3661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:31.228201', 'step': 3661, 'epoch': 1} {'type': 'loss', 'content': 0.1905093640089035, 'timestamp': '2025-09-30 22:16:31.230585', 'step': 3662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.261825', 'step': 3662, 'epoch': 1} {'type': 'loss', 'content': 0.25597721338272095, 'timestamp': '2025-09-30 22:16:31.266148', 'step': 3663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.300593', 'step': 3663, 'epoch': 1} {'type': 'loss', 'content': 0.15033408999443054, 'timestamp': '2025-09-30 22:16:31.326094', 'step': 3664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:31.358080', 'step': 3664, 'epoch': 1} {'type': 'loss', 'content': 0.13724370300769806, 'timestamp': '2025-09-30 22:16:31.360562', 'step': 3665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.391985', 'step': 3665, 'epoch': 1} {'type': 'loss', 'content': 0.2062303125858307, 'timestamp': '2025-09-30 22:16:31.394421', 'step': 3666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.424669', 'step': 3666, 'epoch': 1} {'type': 'loss', 'content': 0.1973963975906372, 'timestamp': '2025-09-30 22:16:31.427045', 'step': 3667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.457331', 'step': 3667, 'epoch': 1} {'type': 'loss', 'content': 0.16395309567451477, 'timestamp': '2025-09-30 22:16:31.481522', 'step': 3668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:31.513410', 'step': 3668, 'epoch': 1} {'type': 'loss', 'content': 0.1334484964609146, 'timestamp': '2025-09-30 22:16:31.516690', 'step': 3669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.549935', 'step': 3669, 'epoch': 1} {'type': 'loss', 'content': 0.14192751049995422, 'timestamp': '2025-09-30 22:16:31.555046', 'step': 3670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.587512', 'step': 3670, 'epoch': 1} {'type': 'loss', 'content': 0.15019553899765015, 'timestamp': '2025-09-30 22:16:31.597745', 'step': 3671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:31.628866', 'step': 3671, 'epoch': 1} {'type': 'loss', 'content': 0.24752092361450195, 'timestamp': '2025-09-30 22:16:31.654394', 'step': 3672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.687528', 'step': 3672, 'epoch': 1} {'type': 'loss', 'content': 0.10254732519388199, 'timestamp': '2025-09-30 22:16:31.691150', 'step': 3673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.722852', 'step': 3673, 'epoch': 1} {'type': 'loss', 'content': 0.17170350253582, 'timestamp': '2025-09-30 22:16:31.729349', 'step': 3674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.763390', 'step': 3674, 'epoch': 1} {'type': 'loss', 'content': 0.26231512427330017, 'timestamp': '2025-09-30 22:16:31.767361', 'step': 3675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.800794', 'step': 3675, 'epoch': 1} {'type': 'loss', 'content': 0.06893021613359451, 'timestamp': '2025-09-30 22:16:31.825208', 'step': 3676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:31.857030', 'step': 3676, 'epoch': 1} {'type': 'loss', 'content': 0.15793481469154358, 'timestamp': '2025-09-30 22:16:31.861008', 'step': 3677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:31.892906', 'step': 3677, 'epoch': 1} {'type': 'loss', 'content': 0.1458524465560913, 'timestamp': '2025-09-30 22:16:31.899397', 'step': 3678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.931332', 'step': 3678, 'epoch': 1} {'type': 'loss', 'content': 0.15180468559265137, 'timestamp': '2025-09-30 22:16:31.934516', 'step': 3679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:31.966859', 'step': 3679, 'epoch': 1} {'type': 'loss', 'content': 0.21179291605949402, 'timestamp': '2025-09-30 22:16:31.992472', 'step': 3680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:32.024406', 'step': 3680, 'epoch': 1} {'type': 'loss', 'content': 0.14737758040428162, 'timestamp': '2025-09-30 22:16:32.026800', 'step': 3681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.060679', 'step': 3681, 'epoch': 1} {'type': 'loss', 'content': 0.12490326166152954, 'timestamp': '2025-09-30 22:16:32.064798', 'step': 3682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.097052', 'step': 3682, 'epoch': 1} {'type': 'loss', 'content': 0.1511831134557724, 'timestamp': '2025-09-30 22:16:32.100460', 'step': 3683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.132193', 'step': 3683, 'epoch': 1} {'type': 'loss', 'content': 0.15075355768203735, 'timestamp': '2025-09-30 22:16:32.158161', 'step': 3684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:32.193467', 'step': 3684, 'epoch': 1} {'type': 'loss', 'content': 0.1277192085981369, 'timestamp': '2025-09-30 22:16:32.197193', 'step': 3685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.230390', 'step': 3685, 'epoch': 1} {'type': 'loss', 'content': 0.3389779329299927, 'timestamp': '2025-09-30 22:16:32.235656', 'step': 3686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:32.267633', 'step': 3686, 'epoch': 1} {'type': 'loss', 'content': 0.19499239325523376, 'timestamp': '2025-09-30 22:16:32.273909', 'step': 3687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.315236', 'step': 3687, 'epoch': 1} {'type': 'loss', 'content': 0.18592417240142822, 'timestamp': '2025-09-30 22:16:32.340023', 'step': 3688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:32.372110', 'step': 3688, 'epoch': 1} {'type': 'loss', 'content': 0.1891518086194992, 'timestamp': '2025-09-30 22:16:32.376223', 'step': 3689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:32.407243', 'step': 3689, 'epoch': 1} {'type': 'loss', 'content': 0.2217111587524414, 'timestamp': '2025-09-30 22:16:32.414658', 'step': 3690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:32.447521', 'step': 3690, 'epoch': 1} {'type': 'loss', 'content': 0.15793916583061218, 'timestamp': '2025-09-30 22:16:32.451891', 'step': 3691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:32.482562', 'step': 3691, 'epoch': 1} {'type': 'loss', 'content': 0.14281591773033142, 'timestamp': '2025-09-30 22:16:32.507139', 'step': 3692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:32.538661', 'step': 3692, 'epoch': 1} {'type': 'loss', 'content': 0.15209098160266876, 'timestamp': '2025-09-30 22:16:32.542898', 'step': 3693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.574873', 'step': 3693, 'epoch': 1} {'type': 'loss', 'content': 0.2477487176656723, 'timestamp': '2025-09-30 22:16:32.577524', 'step': 3694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.614009', 'step': 3694, 'epoch': 1} {'type': 'loss', 'content': 0.26024049520492554, 'timestamp': '2025-09-30 22:16:32.616991', 'step': 3695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.646798', 'step': 3695, 'epoch': 1} {'type': 'loss', 'content': 0.128802090883255, 'timestamp': '2025-09-30 22:16:32.670649', 'step': 3696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:32.702106', 'step': 3696, 'epoch': 1} {'type': 'loss', 'content': 0.06962724030017853, 'timestamp': '2025-09-30 22:16:32.704979', 'step': 3697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.736469', 'step': 3697, 'epoch': 1} {'type': 'loss', 'content': 0.18871961534023285, 'timestamp': '2025-09-30 22:16:32.740459', 'step': 3698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.779890', 'step': 3698, 'epoch': 1} {'type': 'loss', 'content': 0.12487003952264786, 'timestamp': '2025-09-30 22:16:32.785644', 'step': 3699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:32.817994', 'step': 3699, 'epoch': 1} {'type': 'loss', 'content': 0.073636494576931, 'timestamp': '2025-09-30 22:16:32.842619', 'step': 3700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:32.874982', 'step': 3700, 'epoch': 1} {'type': 'loss', 'content': 0.20699958503246307, 'timestamp': '2025-09-30 22:16:32.881327', 'step': 3701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:32.912945', 'step': 3701, 'epoch': 1} {'type': 'loss', 'content': 0.15180817246437073, 'timestamp': '2025-09-30 22:16:32.918042', 'step': 3702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:32.951264', 'step': 3702, 'epoch': 1} {'type': 'loss', 'content': 0.17837341129779816, 'timestamp': '2025-09-30 22:16:32.954057', 'step': 3703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:32.986852', 'step': 3703, 'epoch': 1} {'type': 'loss', 'content': 0.25954702496528625, 'timestamp': '2025-09-30 22:16:33.020637', 'step': 3704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:33.053095', 'step': 3704, 'epoch': 1} {'type': 'loss', 'content': 0.14779017865657806, 'timestamp': '2025-09-30 22:16:33.055858', 'step': 3705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.086865', 'step': 3705, 'epoch': 1} {'type': 'loss', 'content': 0.13425534963607788, 'timestamp': '2025-09-30 22:16:33.089387', 'step': 3706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.119634', 'step': 3706, 'epoch': 1} {'type': 'loss', 'content': 0.21767498552799225, 'timestamp': '2025-09-30 22:16:33.122009', 'step': 3707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.155478', 'step': 3707, 'epoch': 1} {'type': 'loss', 'content': 0.19788415729999542, 'timestamp': '2025-09-30 22:16:33.181885', 'step': 3708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:33.214603', 'step': 3708, 'epoch': 1} {'type': 'loss', 'content': 0.15523600578308105, 'timestamp': '2025-09-30 22:16:33.218882', 'step': 3709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:33.251882', 'step': 3709, 'epoch': 1} {'type': 'loss', 'content': 0.13101990520954132, 'timestamp': '2025-09-30 22:16:33.255720', 'step': 3710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.286625', 'step': 3710, 'epoch': 1} {'type': 'loss', 'content': 0.19279059767723083, 'timestamp': '2025-09-30 22:16:33.289319', 'step': 3711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:33.321912', 'step': 3711, 'epoch': 1} {'type': 'loss', 'content': 0.14015285670757294, 'timestamp': '2025-09-30 22:16:33.348379', 'step': 3712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.380752', 'step': 3712, 'epoch': 1} {'type': 'loss', 'content': 0.24009951949119568, 'timestamp': '2025-09-30 22:16:33.383014', 'step': 3713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.423129', 'step': 3713, 'epoch': 1} {'type': 'loss', 'content': 0.16901803016662598, 'timestamp': '2025-09-30 22:16:33.425764', 'step': 3714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:33.455918', 'step': 3714, 'epoch': 1} {'type': 'loss', 'content': 0.19933511316776276, 'timestamp': '2025-09-30 22:16:33.458855', 'step': 3715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.489673', 'step': 3715, 'epoch': 1} {'type': 'loss', 'content': 0.2663384675979614, 'timestamp': '2025-09-30 22:16:33.514449', 'step': 3716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.546394', 'step': 3716, 'epoch': 1} {'type': 'loss', 'content': 0.1342332363128662, 'timestamp': '2025-09-30 22:16:33.549608', 'step': 3717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.580159', 'step': 3717, 'epoch': 1} {'type': 'loss', 'content': 0.17465868592262268, 'timestamp': '2025-09-30 22:16:33.585886', 'step': 3718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:33.619185', 'step': 3718, 'epoch': 1} {'type': 'loss', 'content': 0.1645013391971588, 'timestamp': '2025-09-30 22:16:33.625339', 'step': 3719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.656109', 'step': 3719, 'epoch': 1} {'type': 'loss', 'content': 0.1337408572435379, 'timestamp': '2025-09-30 22:16:33.679875', 'step': 3720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:33.718502', 'step': 3720, 'epoch': 1} {'type': 'loss', 'content': 0.1750321388244629, 'timestamp': '2025-09-30 22:16:33.720989', 'step': 3721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.754554', 'step': 3721, 'epoch': 1} {'type': 'loss', 'content': 0.10690078884363174, 'timestamp': '2025-09-30 22:16:33.757341', 'step': 3722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.790105', 'step': 3722, 'epoch': 1} {'type': 'loss', 'content': 0.21112005412578583, 'timestamp': '2025-09-30 22:16:33.793239', 'step': 3723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.822654', 'step': 3723, 'epoch': 1} {'type': 'loss', 'content': 0.20984290540218353, 'timestamp': '2025-09-30 22:16:33.846599', 'step': 3724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.876057', 'step': 3724, 'epoch': 1} {'type': 'loss', 'content': 0.13109040260314941, 'timestamp': '2025-09-30 22:16:33.879270', 'step': 3725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:33.909709', 'step': 3725, 'epoch': 1} {'type': 'loss', 'content': 0.18021078407764435, 'timestamp': '2025-09-30 22:16:33.912247', 'step': 3726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:33.942566', 'step': 3726, 'epoch': 1} {'type': 'loss', 'content': 0.16610296070575714, 'timestamp': '2025-09-30 22:16:33.945058', 'step': 3727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:33.983429', 'step': 3727, 'epoch': 1} {'type': 'loss', 'content': 0.15625497698783875, 'timestamp': '2025-09-30 22:16:34.007629', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:16:41.560675', 'step': 3728, 'epoch': 1} {'type': 'pplx', 'content': 8275.873870825324, 'timestamp': '2025-09-30 22:16:41.564796', 'step': 3728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:41.594840', 'step': 3728, 'epoch': 1} {'type': 'loss', 'content': 0.1280631572008133, 'timestamp': '2025-09-30 22:16:41.598998', 'step': 3729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:41.630818', 'step': 3729, 'epoch': 1} {'type': 'loss', 'content': 0.19255435466766357, 'timestamp': '2025-09-30 22:16:41.636779', 'step': 3730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:41.669830', 'step': 3730, 'epoch': 1} {'type': 'loss', 'content': 0.18397048115730286, 'timestamp': '2025-09-30 22:16:41.674284', 'step': 3731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:41.705266', 'step': 3731, 'epoch': 1} {'type': 'loss', 'content': 0.2868592441082001, 'timestamp': '2025-09-30 22:16:41.729818', 'step': 3732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:41.760292', 'step': 3732, 'epoch': 1} {'type': 'loss', 'content': 0.1754176914691925, 'timestamp': '2025-09-30 22:16:41.762738', 'step': 3733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:41.792760', 'step': 3733, 'epoch': 1} {'type': 'loss', 'content': 0.2061156928539276, 'timestamp': '2025-09-30 22:16:41.795392', 'step': 3734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:41.826975', 'step': 3734, 'epoch': 1} {'type': 'loss', 'content': 0.18345357477664948, 'timestamp': '2025-09-30 22:16:41.831113', 'step': 3735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:41.862035', 'step': 3735, 'epoch': 1} {'type': 'loss', 'content': 0.22357486188411713, 'timestamp': '2025-09-30 22:16:41.885851', 'step': 3736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:41.917467', 'step': 3736, 'epoch': 1} {'type': 'loss', 'content': 0.15767507255077362, 'timestamp': '2025-09-30 22:16:41.922566', 'step': 3737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:41.952255', 'step': 3737, 'epoch': 1} {'type': 'loss', 'content': 0.0782117247581482, 'timestamp': '2025-09-30 22:16:41.955049', 'step': 3738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:41.986955', 'step': 3738, 'epoch': 1} {'type': 'loss', 'content': 0.17502757906913757, 'timestamp': '2025-09-30 22:16:41.989653', 'step': 3739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.020776', 'step': 3739, 'epoch': 1} {'type': 'loss', 'content': 0.19354097545146942, 'timestamp': '2025-09-30 22:16:42.048040', 'step': 3740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.078062', 'step': 3740, 'epoch': 1} {'type': 'loss', 'content': 0.20757269859313965, 'timestamp': '2025-09-30 22:16:42.080136', 'step': 3741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.110372', 'step': 3741, 'epoch': 1} {'type': 'loss', 'content': 0.1060805693268776, 'timestamp': '2025-09-30 22:16:42.115445', 'step': 3742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.148172', 'step': 3742, 'epoch': 1} {'type': 'loss', 'content': 0.1481761336326599, 'timestamp': '2025-09-30 22:16:42.150444', 'step': 3743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.182789', 'step': 3743, 'epoch': 1} {'type': 'loss', 'content': 0.2263893336057663, 'timestamp': '2025-09-30 22:16:42.208427', 'step': 3744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.239534', 'step': 3744, 'epoch': 1} {'type': 'loss', 'content': 0.1565149873495102, 'timestamp': '2025-09-30 22:16:42.242903', 'step': 3745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:42.276615', 'step': 3745, 'epoch': 1} {'type': 'loss', 'content': 0.13128606975078583, 'timestamp': '2025-09-30 22:16:42.279182', 'step': 3746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:42.323157', 'step': 3746, 'epoch': 1} {'type': 'loss', 'content': 0.14738740026950836, 'timestamp': '2025-09-30 22:16:42.327811', 'step': 3747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.361991', 'step': 3747, 'epoch': 1} {'type': 'loss', 'content': 0.14155234396457672, 'timestamp': '2025-09-30 22:16:42.386362', 'step': 3748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.417184', 'step': 3748, 'epoch': 1} {'type': 'loss', 'content': 0.24897746741771698, 'timestamp': '2025-09-30 22:16:42.420117', 'step': 3749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:42.451906', 'step': 3749, 'epoch': 1} {'type': 'loss', 'content': 0.16389822959899902, 'timestamp': '2025-09-30 22:16:42.454641', 'step': 3750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.492285', 'step': 3750, 'epoch': 1} {'type': 'loss', 'content': 0.18951363861560822, 'timestamp': '2025-09-30 22:16:42.496829', 'step': 3751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:42.527777', 'step': 3751, 'epoch': 1} {'type': 'loss', 'content': 0.08928772807121277, 'timestamp': '2025-09-30 22:16:42.552248', 'step': 3752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.582891', 'step': 3752, 'epoch': 1} {'type': 'loss', 'content': 0.18145494163036346, 'timestamp': '2025-09-30 22:16:42.585846', 'step': 3753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.620298', 'step': 3753, 'epoch': 1} {'type': 'loss', 'content': 0.12491071969270706, 'timestamp': '2025-09-30 22:16:42.627524', 'step': 3754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:42.666539', 'step': 3754, 'epoch': 1} {'type': 'loss', 'content': 0.15235137939453125, 'timestamp': '2025-09-30 22:16:42.673630', 'step': 3755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.708480', 'step': 3755, 'epoch': 1} {'type': 'loss', 'content': 0.1944771558046341, 'timestamp': '2025-09-30 22:16:42.735643', 'step': 3756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:42.765774', 'step': 3756, 'epoch': 1} {'type': 'loss', 'content': 0.16286605596542358, 'timestamp': '2025-09-30 22:16:42.768020', 'step': 3757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:42.798101', 'step': 3757, 'epoch': 1} {'type': 'loss', 'content': 0.2531851530075073, 'timestamp': '2025-09-30 22:16:42.804269', 'step': 3758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:42.838484', 'step': 3758, 'epoch': 1} {'type': 'loss', 'content': 0.1209113597869873, 'timestamp': '2025-09-30 22:16:42.841044', 'step': 3759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.871362', 'step': 3759, 'epoch': 1} {'type': 'loss', 'content': 0.0956907719373703, 'timestamp': '2025-09-30 22:16:42.897733', 'step': 3760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:42.930809', 'step': 3760, 'epoch': 1} {'type': 'loss', 'content': 0.13038894534111023, 'timestamp': '2025-09-30 22:16:42.939820', 'step': 3761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:42.970333', 'step': 3761, 'epoch': 1} {'type': 'loss', 'content': 0.12784667313098907, 'timestamp': '2025-09-30 22:16:42.975547', 'step': 3762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.008176', 'step': 3762, 'epoch': 1} {'type': 'loss', 'content': 0.154861718416214, 'timestamp': '2025-09-30 22:16:43.013716', 'step': 3763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.047827', 'step': 3763, 'epoch': 1} {'type': 'loss', 'content': 0.1353437751531601, 'timestamp': '2025-09-30 22:16:43.073983', 'step': 3764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:43.107436', 'step': 3764, 'epoch': 1} {'type': 'loss', 'content': 0.10217958688735962, 'timestamp': '2025-09-30 22:16:43.110440', 'step': 3765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.141265', 'step': 3765, 'epoch': 1} {'type': 'loss', 'content': 0.07350721955299377, 'timestamp': '2025-09-30 22:16:43.144037', 'step': 3766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.178565', 'step': 3766, 'epoch': 1} {'type': 'loss', 'content': 0.12132099270820618, 'timestamp': '2025-09-30 22:16:43.181338', 'step': 3767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:43.212847', 'step': 3767, 'epoch': 1} {'type': 'loss', 'content': 0.2046782523393631, 'timestamp': '2025-09-30 22:16:43.236999', 'step': 3768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:43.271158', 'step': 3768, 'epoch': 1} {'type': 'loss', 'content': 0.18252605199813843, 'timestamp': '2025-09-30 22:16:43.277112', 'step': 3769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:43.311018', 'step': 3769, 'epoch': 1} {'type': 'loss', 'content': 0.1163482740521431, 'timestamp': '2025-09-30 22:16:43.315116', 'step': 3770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.345989', 'step': 3770, 'epoch': 1} {'type': 'loss', 'content': 0.11727192997932434, 'timestamp': '2025-09-30 22:16:43.352244', 'step': 3771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.384865', 'step': 3771, 'epoch': 1} {'type': 'loss', 'content': 0.1285790503025055, 'timestamp': '2025-09-30 22:16:43.409110', 'step': 3772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.444369', 'step': 3772, 'epoch': 1} {'type': 'loss', 'content': 0.17350247502326965, 'timestamp': '2025-09-30 22:16:43.453864', 'step': 3773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.484951', 'step': 3773, 'epoch': 1} {'type': 'loss', 'content': 0.23386070132255554, 'timestamp': '2025-09-30 22:16:43.489666', 'step': 3774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.520328', 'step': 3774, 'epoch': 1} {'type': 'loss', 'content': 0.05940621718764305, 'timestamp': '2025-09-30 22:16:43.525356', 'step': 3775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:43.558602', 'step': 3775, 'epoch': 1} {'type': 'loss', 'content': 0.14364071190357208, 'timestamp': '2025-09-30 22:16:43.582414', 'step': 3776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.613576', 'step': 3776, 'epoch': 1} {'type': 'loss', 'content': 0.16916152834892273, 'timestamp': '2025-09-30 22:16:43.615944', 'step': 3777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.646894', 'step': 3777, 'epoch': 1} {'type': 'loss', 'content': 0.18598181009292603, 'timestamp': '2025-09-30 22:16:43.651817', 'step': 3778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.683105', 'step': 3778, 'epoch': 1} {'type': 'loss', 'content': 0.1205325797200203, 'timestamp': '2025-09-30 22:16:43.685058', 'step': 3779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.715887', 'step': 3779, 'epoch': 1} {'type': 'loss', 'content': 0.13761883974075317, 'timestamp': '2025-09-30 22:16:43.739861', 'step': 3780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.776136', 'step': 3780, 'epoch': 1} {'type': 'loss', 'content': 0.1396421194076538, 'timestamp': '2025-09-30 22:16:43.778331', 'step': 3781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:43.809891', 'step': 3781, 'epoch': 1} {'type': 'loss', 'content': 0.09940320253372192, 'timestamp': '2025-09-30 22:16:43.812589', 'step': 3782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.846084', 'step': 3782, 'epoch': 1} {'type': 'loss', 'content': 0.19324526190757751, 'timestamp': '2025-09-30 22:16:43.852258', 'step': 3783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.887060', 'step': 3783, 'epoch': 1} {'type': 'loss', 'content': 0.11671096086502075, 'timestamp': '2025-09-30 22:16:43.910814', 'step': 3784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:43.941219', 'step': 3784, 'epoch': 1} {'type': 'loss', 'content': 0.18853558599948883, 'timestamp': '2025-09-30 22:16:43.944635', 'step': 3785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:43.977169', 'step': 3785, 'epoch': 1} {'type': 'loss', 'content': 0.09357260167598724, 'timestamp': '2025-09-30 22:16:43.979763', 'step': 3786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.011095', 'step': 3786, 'epoch': 1} {'type': 'loss', 'content': 0.14602358639240265, 'timestamp': '2025-09-30 22:16:44.013440', 'step': 3787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:44.046264', 'step': 3787, 'epoch': 1} {'type': 'loss', 'content': 0.16079404950141907, 'timestamp': '2025-09-30 22:16:44.071725', 'step': 3788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:44.102680', 'step': 3788, 'epoch': 1} {'type': 'loss', 'content': 0.1967417299747467, 'timestamp': '2025-09-30 22:16:44.105311', 'step': 3789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:44.152891', 'step': 3789, 'epoch': 1} {'type': 'loss', 'content': 0.14708806574344635, 'timestamp': '2025-09-30 22:16:44.155339', 'step': 3790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:44.187999', 'step': 3790, 'epoch': 1} {'type': 'loss', 'content': 0.1500082165002823, 'timestamp': '2025-09-30 22:16:44.191530', 'step': 3791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.222786', 'step': 3791, 'epoch': 1} {'type': 'loss', 'content': 0.16424886882305145, 'timestamp': '2025-09-30 22:16:44.247012', 'step': 3792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:44.285377', 'step': 3792, 'epoch': 1} {'type': 'loss', 'content': 0.25060516595840454, 'timestamp': '2025-09-30 22:16:44.290276', 'step': 3793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:44.322617', 'step': 3793, 'epoch': 1} {'type': 'loss', 'content': 0.17755191028118134, 'timestamp': '2025-09-30 22:16:44.329347', 'step': 3794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.360574', 'step': 3794, 'epoch': 1} {'type': 'loss', 'content': 0.21398478746414185, 'timestamp': '2025-09-30 22:16:44.364609', 'step': 3795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.396954', 'step': 3795, 'epoch': 1} {'type': 'loss', 'content': 0.14907725155353546, 'timestamp': '2025-09-30 22:16:44.431398', 'step': 3796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.464265', 'step': 3796, 'epoch': 1} {'type': 'loss', 'content': 0.07056176662445068, 'timestamp': '2025-09-30 22:16:44.466860', 'step': 3797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:44.500778', 'step': 3797, 'epoch': 1} {'type': 'loss', 'content': 0.17093710601329803, 'timestamp': '2025-09-30 22:16:44.508005', 'step': 3798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.541776', 'step': 3798, 'epoch': 1} {'type': 'loss', 'content': 0.2792108356952667, 'timestamp': '2025-09-30 22:16:44.549598', 'step': 3799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.585691', 'step': 3799, 'epoch': 1} {'type': 'loss', 'content': 0.07967677712440491, 'timestamp': '2025-09-30 22:16:44.611901', 'step': 3800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.646471', 'step': 3800, 'epoch': 1} {'type': 'loss', 'content': 0.11487335711717606, 'timestamp': '2025-09-30 22:16:44.648692', 'step': 3801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.682236', 'step': 3801, 'epoch': 1} {'type': 'loss', 'content': 0.1269044727087021, 'timestamp': '2025-09-30 22:16:44.686979', 'step': 3802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.719762', 'step': 3802, 'epoch': 1} {'type': 'loss', 'content': 0.1198916882276535, 'timestamp': '2025-09-30 22:16:44.722995', 'step': 3803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.755169', 'step': 3803, 'epoch': 1} {'type': 'loss', 'content': 0.1879972368478775, 'timestamp': '2025-09-30 22:16:44.779103', 'step': 3804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.809099', 'step': 3804, 'epoch': 1} {'type': 'loss', 'content': 0.15515102446079254, 'timestamp': '2025-09-30 22:16:44.811494', 'step': 3805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.842326', 'step': 3805, 'epoch': 1} {'type': 'loss', 'content': 0.13739526271820068, 'timestamp': '2025-09-30 22:16:44.844677', 'step': 3806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:44.875521', 'step': 3806, 'epoch': 1} {'type': 'loss', 'content': 0.23554059863090515, 'timestamp': '2025-09-30 22:16:44.877721', 'step': 3807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:44.912872', 'step': 3807, 'epoch': 1} {'type': 'loss', 'content': 0.083995521068573, 'timestamp': '2025-09-30 22:16:44.940723', 'step': 3808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:44.975584', 'step': 3808, 'epoch': 1} {'type': 'loss', 'content': 0.15639826655387878, 'timestamp': '2025-09-30 22:16:44.978531', 'step': 3809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:45.009500', 'step': 3809, 'epoch': 1} {'type': 'loss', 'content': 0.17074152827262878, 'timestamp': '2025-09-30 22:16:45.012018', 'step': 3810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:45.043546', 'step': 3810, 'epoch': 1} {'type': 'loss', 'content': 0.13105764985084534, 'timestamp': '2025-09-30 22:16:45.049236', 'step': 3811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.080488', 'step': 3811, 'epoch': 1} {'type': 'loss', 'content': 0.19361333549022675, 'timestamp': '2025-09-30 22:16:45.107007', 'step': 3812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.140065', 'step': 3812, 'epoch': 1} {'type': 'loss', 'content': 0.16310033202171326, 'timestamp': '2025-09-30 22:16:45.145319', 'step': 3813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.176440', 'step': 3813, 'epoch': 1} {'type': 'loss', 'content': 0.14847174286842346, 'timestamp': '2025-09-30 22:16:45.183387', 'step': 3814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.216945', 'step': 3814, 'epoch': 1} {'type': 'loss', 'content': 0.11608218401670456, 'timestamp': '2025-09-30 22:16:45.222489', 'step': 3815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:45.253958', 'step': 3815, 'epoch': 1} {'type': 'loss', 'content': 0.2180759459733963, 'timestamp': '2025-09-30 22:16:45.280438', 'step': 3816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.312619', 'step': 3816, 'epoch': 1} {'type': 'loss', 'content': 0.15345367789268494, 'timestamp': '2025-09-30 22:16:45.316297', 'step': 3817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:45.347990', 'step': 3817, 'epoch': 1} {'type': 'loss', 'content': 0.1019454374909401, 'timestamp': '2025-09-30 22:16:45.350380', 'step': 3818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:45.384385', 'step': 3818, 'epoch': 1} {'type': 'loss', 'content': 0.1699514538049698, 'timestamp': '2025-09-30 22:16:45.387310', 'step': 3819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.417836', 'step': 3819, 'epoch': 1} {'type': 'loss', 'content': 0.15577779710292816, 'timestamp': '2025-09-30 22:16:45.441649', 'step': 3820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.471621', 'step': 3820, 'epoch': 1} {'type': 'loss', 'content': 0.10859446227550507, 'timestamp': '2025-09-30 22:16:45.475608', 'step': 3821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.506160', 'step': 3821, 'epoch': 1} {'type': 'loss', 'content': 0.17976367473602295, 'timestamp': '2025-09-30 22:16:45.508282', 'step': 3822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.540113', 'step': 3822, 'epoch': 1} {'type': 'loss', 'content': 0.09955506026744843, 'timestamp': '2025-09-30 22:16:45.542899', 'step': 3823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.574546', 'step': 3823, 'epoch': 1} {'type': 'loss', 'content': 0.20131385326385498, 'timestamp': '2025-09-30 22:16:45.604754', 'step': 3824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.636535', 'step': 3824, 'epoch': 1} {'type': 'loss', 'content': 0.10438505560159683, 'timestamp': '2025-09-30 22:16:45.638738', 'step': 3825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:45.670560', 'step': 3825, 'epoch': 1} {'type': 'loss', 'content': 0.18432217836380005, 'timestamp': '2025-09-30 22:16:45.673590', 'step': 3826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:45.709233', 'step': 3826, 'epoch': 1} {'type': 'loss', 'content': 0.13370954990386963, 'timestamp': '2025-09-30 22:16:45.713593', 'step': 3827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.749029', 'step': 3827, 'epoch': 1} {'type': 'loss', 'content': 0.20234300196170807, 'timestamp': '2025-09-30 22:16:45.779118', 'step': 3828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:45.810305', 'step': 3828, 'epoch': 1} {'type': 'loss', 'content': 0.14908304810523987, 'timestamp': '2025-09-30 22:16:45.814402', 'step': 3829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.846817', 'step': 3829, 'epoch': 1} {'type': 'loss', 'content': 0.10269883275032043, 'timestamp': '2025-09-30 22:16:45.849310', 'step': 3830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.879897', 'step': 3830, 'epoch': 1} {'type': 'loss', 'content': 0.15141813457012177, 'timestamp': '2025-09-30 22:16:45.889294', 'step': 3831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:45.928772', 'step': 3831, 'epoch': 1} {'type': 'loss', 'content': 0.2125098556280136, 'timestamp': '2025-09-30 22:16:45.953522', 'step': 3832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:45.984429', 'step': 3832, 'epoch': 1} {'type': 'loss', 'content': 0.19123387336730957, 'timestamp': '2025-09-30 22:16:45.986728', 'step': 3833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.021100', 'step': 3833, 'epoch': 1} {'type': 'loss', 'content': 0.2026839256286621, 'timestamp': '2025-09-30 22:16:46.024295', 'step': 3834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.054773', 'step': 3834, 'epoch': 1} {'type': 'loss', 'content': 0.22078585624694824, 'timestamp': '2025-09-30 22:16:46.057926', 'step': 3835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.093287', 'step': 3835, 'epoch': 1} {'type': 'loss', 'content': 0.15526153147220612, 'timestamp': '2025-09-30 22:16:46.119529', 'step': 3836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.150162', 'step': 3836, 'epoch': 1} {'type': 'loss', 'content': 0.1778767853975296, 'timestamp': '2025-09-30 22:16:46.161632', 'step': 3837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.195786', 'step': 3837, 'epoch': 1} {'type': 'loss', 'content': 0.10853828489780426, 'timestamp': '2025-09-30 22:16:46.198286', 'step': 3838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.230538', 'step': 3838, 'epoch': 1} {'type': 'loss', 'content': 0.1774042397737503, 'timestamp': '2025-09-30 22:16:46.240840', 'step': 3839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.274772', 'step': 3839, 'epoch': 1} {'type': 'loss', 'content': 0.1827893853187561, 'timestamp': '2025-09-30 22:16:46.299698', 'step': 3840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:46.331985', 'step': 3840, 'epoch': 1} {'type': 'loss', 'content': 0.2026434987783432, 'timestamp': '2025-09-30 22:16:46.346233', 'step': 3841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.382208', 'step': 3841, 'epoch': 1} {'type': 'loss', 'content': 0.2060280591249466, 'timestamp': '2025-09-30 22:16:46.385136', 'step': 3842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.420556', 'step': 3842, 'epoch': 1} {'type': 'loss', 'content': 0.19990694522857666, 'timestamp': '2025-09-30 22:16:46.423996', 'step': 3843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:46.455175', 'step': 3843, 'epoch': 1} {'type': 'loss', 'content': 0.18553344905376434, 'timestamp': '2025-09-30 22:16:46.479453', 'step': 3844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.510129', 'step': 3844, 'epoch': 1} {'type': 'loss', 'content': 0.2544058561325073, 'timestamp': '2025-09-30 22:16:46.512875', 'step': 3845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:46.547026', 'step': 3845, 'epoch': 1} {'type': 'loss', 'content': 0.18609300255775452, 'timestamp': '2025-09-30 22:16:46.553677', 'step': 3846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.591086', 'step': 3846, 'epoch': 1} {'type': 'loss', 'content': 0.28485745191574097, 'timestamp': '2025-09-30 22:16:46.599344', 'step': 3847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.636214', 'step': 3847, 'epoch': 1} {'type': 'loss', 'content': 0.2502376437187195, 'timestamp': '2025-09-30 22:16:46.663119', 'step': 3848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.702078', 'step': 3848, 'epoch': 1} {'type': 'loss', 'content': 0.18907330930233002, 'timestamp': '2025-09-30 22:16:46.707524', 'step': 3849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.747430', 'step': 3849, 'epoch': 1} {'type': 'loss', 'content': 0.16607896983623505, 'timestamp': '2025-09-30 22:16:46.749944', 'step': 3850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.782997', 'step': 3850, 'epoch': 1} {'type': 'loss', 'content': 0.1963794231414795, 'timestamp': '2025-09-30 22:16:46.791449', 'step': 3851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:46.832324', 'step': 3851, 'epoch': 1} {'type': 'loss', 'content': 0.1914205104112625, 'timestamp': '2025-09-30 22:16:46.858788', 'step': 3852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:46.889076', 'step': 3852, 'epoch': 1} {'type': 'loss', 'content': 0.18790040910243988, 'timestamp': '2025-09-30 22:16:46.896496', 'step': 3853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:46.936640', 'step': 3853, 'epoch': 1} {'type': 'loss', 'content': 0.16760937869548798, 'timestamp': '2025-09-30 22:16:46.939714', 'step': 3854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:46.971278', 'step': 3854, 'epoch': 1} {'type': 'loss', 'content': 0.1746075600385666, 'timestamp': '2025-09-30 22:16:46.974348', 'step': 3855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.013126', 'step': 3855, 'epoch': 1} {'type': 'loss', 'content': 0.18071553111076355, 'timestamp': '2025-09-30 22:16:47.042073', 'step': 3856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.077444', 'step': 3856, 'epoch': 1} {'type': 'loss', 'content': 0.15430179238319397, 'timestamp': '2025-09-30 22:16:47.083357', 'step': 3857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.120866', 'step': 3857, 'epoch': 1} {'type': 'loss', 'content': 0.15942366421222687, 'timestamp': '2025-09-30 22:16:47.124340', 'step': 3858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:47.155695', 'step': 3858, 'epoch': 1} {'type': 'loss', 'content': 0.12488314509391785, 'timestamp': '2025-09-30 22:16:47.158720', 'step': 3859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:47.190591', 'step': 3859, 'epoch': 1} {'type': 'loss', 'content': 0.09860138595104218, 'timestamp': '2025-09-30 22:16:47.218395', 'step': 3860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.251131', 'step': 3860, 'epoch': 1} {'type': 'loss', 'content': 0.23918908834457397, 'timestamp': '2025-09-30 22:16:47.255490', 'step': 3861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.290732', 'step': 3861, 'epoch': 1} {'type': 'loss', 'content': 0.1697978377342224, 'timestamp': '2025-09-30 22:16:47.298824', 'step': 3862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:47.335949', 'step': 3862, 'epoch': 1} {'type': 'loss', 'content': 0.13072629272937775, 'timestamp': '2025-09-30 22:16:47.344560', 'step': 3863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:47.380908', 'step': 3863, 'epoch': 1} {'type': 'loss', 'content': 0.08775485306978226, 'timestamp': '2025-09-30 22:16:47.405245', 'step': 3864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:47.444327', 'step': 3864, 'epoch': 1} {'type': 'loss', 'content': 0.2157089114189148, 'timestamp': '2025-09-30 22:16:47.446909', 'step': 3865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:47.477332', 'step': 3865, 'epoch': 1} {'type': 'loss', 'content': 0.24687261879444122, 'timestamp': '2025-09-30 22:16:47.484415', 'step': 3866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.521132', 'step': 3866, 'epoch': 1} {'type': 'loss', 'content': 0.1494709998369217, 'timestamp': '2025-09-30 22:16:47.528146', 'step': 3867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:47.558557', 'step': 3867, 'epoch': 1} {'type': 'loss', 'content': 0.11231246590614319, 'timestamp': '2025-09-30 22:16:47.582662', 'step': 3868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.614184', 'step': 3868, 'epoch': 1} {'type': 'loss', 'content': 0.2024535983800888, 'timestamp': '2025-09-30 22:16:47.624162', 'step': 3869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:47.659516', 'step': 3869, 'epoch': 1} {'type': 'loss', 'content': 0.18784715235233307, 'timestamp': '2025-09-30 22:16:47.666345', 'step': 3870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:47.702031', 'step': 3870, 'epoch': 1} {'type': 'loss', 'content': 0.12651999294757843, 'timestamp': '2025-09-30 22:16:47.705906', 'step': 3871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:47.740495', 'step': 3871, 'epoch': 1} {'type': 'loss', 'content': 0.2585206627845764, 'timestamp': '2025-09-30 22:16:47.764955', 'step': 3872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:47.799057', 'step': 3872, 'epoch': 1} {'type': 'loss', 'content': 0.2645137310028076, 'timestamp': '2025-09-30 22:16:47.802708', 'step': 3873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:47.833794', 'step': 3873, 'epoch': 1} {'type': 'loss', 'content': 0.14119546115398407, 'timestamp': '2025-09-30 22:16:47.840997', 'step': 3874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:47.879533', 'step': 3874, 'epoch': 1} {'type': 'loss', 'content': 0.09681424498558044, 'timestamp': '2025-09-30 22:16:47.896009', 'step': 3875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:47.926540', 'step': 3875, 'epoch': 1} {'type': 'loss', 'content': 0.12558311223983765, 'timestamp': '2025-09-30 22:16:47.950869', 'step': 3876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:47.988805', 'step': 3876, 'epoch': 1} {'type': 'loss', 'content': 0.1269015371799469, 'timestamp': '2025-09-30 22:16:47.997578', 'step': 3877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.029577', 'step': 3877, 'epoch': 1} {'type': 'loss', 'content': 0.11958079040050507, 'timestamp': '2025-09-30 22:16:48.032835', 'step': 3878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:48.063600', 'step': 3878, 'epoch': 1} {'type': 'loss', 'content': 0.2165294885635376, 'timestamp': '2025-09-30 22:16:48.066365', 'step': 3879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:48.096918', 'step': 3879, 'epoch': 1} {'type': 'loss', 'content': 0.1078471764922142, 'timestamp': '2025-09-30 22:16:48.121055', 'step': 3880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:48.157534', 'step': 3880, 'epoch': 1} {'type': 'loss', 'content': 0.20303665101528168, 'timestamp': '2025-09-30 22:16:48.161092', 'step': 3881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.198779', 'step': 3881, 'epoch': 1} {'type': 'loss', 'content': 0.17056173086166382, 'timestamp': '2025-09-30 22:16:48.208342', 'step': 3882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.245084', 'step': 3882, 'epoch': 1} {'type': 'loss', 'content': 0.12849029898643494, 'timestamp': '2025-09-30 22:16:48.253413', 'step': 3883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.284562', 'step': 3883, 'epoch': 1} {'type': 'loss', 'content': 0.1946517676115036, 'timestamp': '2025-09-30 22:16:48.314918', 'step': 3884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.364160', 'step': 3884, 'epoch': 1} {'type': 'loss', 'content': 0.2299448549747467, 'timestamp': '2025-09-30 22:16:48.367506', 'step': 3885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.399063', 'step': 3885, 'epoch': 1} {'type': 'loss', 'content': 0.12585151195526123, 'timestamp': '2025-09-30 22:16:48.402759', 'step': 3886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:48.433336', 'step': 3886, 'epoch': 1} {'type': 'loss', 'content': 0.1438792645931244, 'timestamp': '2025-09-30 22:16:48.435754', 'step': 3887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.474590', 'step': 3887, 'epoch': 1} {'type': 'loss', 'content': 0.18184250593185425, 'timestamp': '2025-09-30 22:16:48.503904', 'step': 3888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.544149', 'step': 3888, 'epoch': 1} {'type': 'loss', 'content': 0.2071993201971054, 'timestamp': '2025-09-30 22:16:48.551676', 'step': 3889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.593641', 'step': 3889, 'epoch': 1} {'type': 'loss', 'content': 0.18088307976722717, 'timestamp': '2025-09-30 22:16:48.596213', 'step': 3890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.632693', 'step': 3890, 'epoch': 1} {'type': 'loss', 'content': 0.18592551350593567, 'timestamp': '2025-09-30 22:16:48.635232', 'step': 3891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:48.666216', 'step': 3891, 'epoch': 1} {'type': 'loss', 'content': 0.1910654902458191, 'timestamp': '2025-09-30 22:16:48.690878', 'step': 3892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.723217', 'step': 3892, 'epoch': 1} {'type': 'loss', 'content': 0.12215204536914825, 'timestamp': '2025-09-30 22:16:48.725636', 'step': 3893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:48.761656', 'step': 3893, 'epoch': 1} {'type': 'loss', 'content': 0.15648126602172852, 'timestamp': '2025-09-30 22:16:48.764257', 'step': 3894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:48.799437', 'step': 3894, 'epoch': 1} {'type': 'loss', 'content': 0.15457195043563843, 'timestamp': '2025-09-30 22:16:48.809245', 'step': 3895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.843794', 'step': 3895, 'epoch': 1} {'type': 'loss', 'content': 0.14651775360107422, 'timestamp': '2025-09-30 22:16:48.873194', 'step': 3896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:48.903774', 'step': 3896, 'epoch': 1} {'type': 'loss', 'content': 0.11228183656930923, 'timestamp': '2025-09-30 22:16:48.910286', 'step': 3897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:48.947251', 'step': 3897, 'epoch': 1} {'type': 'loss', 'content': 0.1551772952079773, 'timestamp': '2025-09-30 22:16:48.956585', 'step': 3898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:48.997524', 'step': 3898, 'epoch': 1} {'type': 'loss', 'content': 0.21975615620613098, 'timestamp': '2025-09-30 22:16:49.002786', 'step': 3899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:49.036534', 'step': 3899, 'epoch': 1} {'type': 'loss', 'content': 0.1746671050786972, 'timestamp': '2025-09-30 22:16:49.061596', 'step': 3900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:49.092554', 'step': 3900, 'epoch': 1} {'type': 'loss', 'content': 0.19885987043380737, 'timestamp': '2025-09-30 22:16:49.095934', 'step': 3901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:49.127489', 'step': 3901, 'epoch': 1} {'type': 'loss', 'content': 0.10454394668340683, 'timestamp': '2025-09-30 22:16:49.130838', 'step': 3902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:49.162138', 'step': 3902, 'epoch': 1} {'type': 'loss', 'content': 0.1460525393486023, 'timestamp': '2025-09-30 22:16:49.165467', 'step': 3903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:49.196850', 'step': 3903, 'epoch': 1} {'type': 'loss', 'content': 0.13066308200359344, 'timestamp': '2025-09-30 22:16:49.221328', 'step': 3904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.260333', 'step': 3904, 'epoch': 1} {'type': 'loss', 'content': 0.18352936208248138, 'timestamp': '2025-09-30 22:16:49.263238', 'step': 3905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:49.293745', 'step': 3905, 'epoch': 1} {'type': 'loss', 'content': 0.14099107682704926, 'timestamp': '2025-09-30 22:16:49.296710', 'step': 3906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.329560', 'step': 3906, 'epoch': 1} {'type': 'loss', 'content': 0.180861234664917, 'timestamp': '2025-09-30 22:16:49.345037', 'step': 3907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.377044', 'step': 3907, 'epoch': 1} {'type': 'loss', 'content': 0.11241436004638672, 'timestamp': '2025-09-30 22:16:49.401577', 'step': 3908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:49.432809', 'step': 3908, 'epoch': 1} {'type': 'loss', 'content': 0.19732432067394257, 'timestamp': '2025-09-30 22:16:49.440028', 'step': 3909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.475878', 'step': 3909, 'epoch': 1} {'type': 'loss', 'content': 0.17866981029510498, 'timestamp': '2025-09-30 22:16:49.478559', 'step': 3910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.512726', 'step': 3910, 'epoch': 1} {'type': 'loss', 'content': 0.15810216963291168, 'timestamp': '2025-09-30 22:16:49.515475', 'step': 3911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.550131', 'step': 3911, 'epoch': 1} {'type': 'loss', 'content': 0.1419801115989685, 'timestamp': '2025-09-30 22:16:49.578732', 'step': 3912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.608892', 'step': 3912, 'epoch': 1} {'type': 'loss', 'content': 0.1360234171152115, 'timestamp': '2025-09-30 22:16:49.611975', 'step': 3913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:49.643379', 'step': 3913, 'epoch': 1} {'type': 'loss', 'content': 0.1339498907327652, 'timestamp': '2025-09-30 22:16:49.650752', 'step': 3914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.686679', 'step': 3914, 'epoch': 1} {'type': 'loss', 'content': 0.18493981659412384, 'timestamp': '2025-09-30 22:16:49.689171', 'step': 3915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:49.721217', 'step': 3915, 'epoch': 1} {'type': 'loss', 'content': 0.10566464811563492, 'timestamp': '2025-09-30 22:16:49.749122', 'step': 3916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.780037', 'step': 3916, 'epoch': 1} {'type': 'loss', 'content': 0.23218011856079102, 'timestamp': '2025-09-30 22:16:49.782343', 'step': 3917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.814394', 'step': 3917, 'epoch': 1} {'type': 'loss', 'content': 0.24747203290462494, 'timestamp': '2025-09-30 22:16:49.817026', 'step': 3918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.847906', 'step': 3918, 'epoch': 1} {'type': 'loss', 'content': 0.2630103528499603, 'timestamp': '2025-09-30 22:16:49.850827', 'step': 3919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:49.887633', 'step': 3919, 'epoch': 1} {'type': 'loss', 'content': 0.1905900239944458, 'timestamp': '2025-09-30 22:16:49.913090', 'step': 3920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:49.948282', 'step': 3920, 'epoch': 1} {'type': 'loss', 'content': 0.12511293590068817, 'timestamp': '2025-09-30 22:16:49.954946', 'step': 3921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:49.994662', 'step': 3921, 'epoch': 1} {'type': 'loss', 'content': 0.19538509845733643, 'timestamp': '2025-09-30 22:16:49.997989', 'step': 3922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:50.037657', 'step': 3922, 'epoch': 1} {'type': 'loss', 'content': 0.13238918781280518, 'timestamp': '2025-09-30 22:16:50.040513', 'step': 3923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.071294', 'step': 3923, 'epoch': 1} {'type': 'loss', 'content': 0.13189780712127686, 'timestamp': '2025-09-30 22:16:50.098872', 'step': 3924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.129431', 'step': 3924, 'epoch': 1} {'type': 'loss', 'content': 0.11927410960197449, 'timestamp': '2025-09-30 22:16:50.136909', 'step': 3925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.169138', 'step': 3925, 'epoch': 1} {'type': 'loss', 'content': 0.18569907546043396, 'timestamp': '2025-09-30 22:16:50.171992', 'step': 3926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.203438', 'step': 3926, 'epoch': 1} {'type': 'loss', 'content': 0.1581237018108368, 'timestamp': '2025-09-30 22:16:50.206052', 'step': 3927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.263206', 'step': 3927, 'epoch': 1} {'type': 'loss', 'content': 0.16648538410663605, 'timestamp': '2025-09-30 22:16:50.287939', 'step': 3928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.319388', 'step': 3928, 'epoch': 1} {'type': 'loss', 'content': 0.12283699214458466, 'timestamp': '2025-09-30 22:16:50.329773', 'step': 3929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:50.364565', 'step': 3929, 'epoch': 1} {'type': 'loss', 'content': 0.12395458668470383, 'timestamp': '2025-09-30 22:16:50.371442', 'step': 3930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.403492', 'step': 3930, 'epoch': 1} {'type': 'loss', 'content': 0.23276545107364655, 'timestamp': '2025-09-30 22:16:50.406315', 'step': 3931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.437865', 'step': 3931, 'epoch': 1} {'type': 'loss', 'content': 0.14853332936763763, 'timestamp': '2025-09-30 22:16:50.468429', 'step': 3932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:50.506073', 'step': 3932, 'epoch': 1} {'type': 'loss', 'content': 0.16650325059890747, 'timestamp': '2025-09-30 22:16:50.512886', 'step': 3933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.543750', 'step': 3933, 'epoch': 1} {'type': 'loss', 'content': 0.130880668759346, 'timestamp': '2025-09-30 22:16:50.550229', 'step': 3934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.584825', 'step': 3934, 'epoch': 1} {'type': 'loss', 'content': 0.221500962972641, 'timestamp': '2025-09-30 22:16:50.587911', 'step': 3935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.622876', 'step': 3935, 'epoch': 1} {'type': 'loss', 'content': 0.0936082974076271, 'timestamp': '2025-09-30 22:16:50.647081', 'step': 3936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.677500', 'step': 3936, 'epoch': 1} {'type': 'loss', 'content': 0.15051694214344025, 'timestamp': '2025-09-30 22:16:50.681048', 'step': 3937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:50.711487', 'step': 3937, 'epoch': 1} {'type': 'loss', 'content': 0.16743770241737366, 'timestamp': '2025-09-30 22:16:50.713860', 'step': 3938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:50.746911', 'step': 3938, 'epoch': 1} {'type': 'loss', 'content': 0.19143420457839966, 'timestamp': '2025-09-30 22:16:50.751259', 'step': 3939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:50.786252', 'step': 3939, 'epoch': 1} {'type': 'loss', 'content': 0.18919232487678528, 'timestamp': '2025-09-30 22:16:50.815473', 'step': 3940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:50.849034', 'step': 3940, 'epoch': 1} {'type': 'loss', 'content': 0.21953335404396057, 'timestamp': '2025-09-30 22:16:50.860110', 'step': 3941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.893346', 'step': 3941, 'epoch': 1} {'type': 'loss', 'content': 0.21391472220420837, 'timestamp': '2025-09-30 22:16:50.896771', 'step': 3942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:50.928914', 'step': 3942, 'epoch': 1} {'type': 'loss', 'content': 0.19085319340229034, 'timestamp': '2025-09-30 22:16:50.935052', 'step': 3943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:50.969860', 'step': 3943, 'epoch': 1} {'type': 'loss', 'content': 0.20830711722373962, 'timestamp': '2025-09-30 22:16:50.996703', 'step': 3944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.027506', 'step': 3944, 'epoch': 1} {'type': 'loss', 'content': 0.18583667278289795, 'timestamp': '2025-09-30 22:16:51.033966', 'step': 3945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.068554', 'step': 3945, 'epoch': 1} {'type': 'loss', 'content': 0.19964514672756195, 'timestamp': '2025-09-30 22:16:51.071223', 'step': 3946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:51.101401', 'step': 3946, 'epoch': 1} {'type': 'loss', 'content': 0.16761913895606995, 'timestamp': '2025-09-30 22:16:51.107391', 'step': 3947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.139943', 'step': 3947, 'epoch': 1} {'type': 'loss', 'content': 0.2255319505929947, 'timestamp': '2025-09-30 22:16:51.165248', 'step': 3948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:51.200506', 'step': 3948, 'epoch': 1} {'type': 'loss', 'content': 0.13994200527668, 'timestamp': '2025-09-30 22:16:51.207556', 'step': 3949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.243370', 'step': 3949, 'epoch': 1} {'type': 'loss', 'content': 0.16265399754047394, 'timestamp': '2025-09-30 22:16:51.246247', 'step': 3950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.277072', 'step': 3950, 'epoch': 1} {'type': 'loss', 'content': 0.323369562625885, 'timestamp': '2025-09-30 22:16:51.279890', 'step': 3951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.310804', 'step': 3951, 'epoch': 1} {'type': 'loss', 'content': 0.15379677712917328, 'timestamp': '2025-09-30 22:16:51.339252', 'step': 3952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.370547', 'step': 3952, 'epoch': 1} {'type': 'loss', 'content': 0.23208589851856232, 'timestamp': '2025-09-30 22:16:51.378270', 'step': 3953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:51.409334', 'step': 3953, 'epoch': 1} {'type': 'loss', 'content': 0.12793095409870148, 'timestamp': '2025-09-30 22:16:51.417279', 'step': 3954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.452420', 'step': 3954, 'epoch': 1} {'type': 'loss', 'content': 0.11297771334648132, 'timestamp': '2025-09-30 22:16:51.460085', 'step': 3955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.492283', 'step': 3955, 'epoch': 1} {'type': 'loss', 'content': 0.09154194593429565, 'timestamp': '2025-09-30 22:16:51.521294', 'step': 3956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.552304', 'step': 3956, 'epoch': 1} {'type': 'loss', 'content': 0.12304475903511047, 'timestamp': '2025-09-30 22:16:51.555977', 'step': 3957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.587260', 'step': 3957, 'epoch': 1} {'type': 'loss', 'content': 0.1842927187681198, 'timestamp': '2025-09-30 22:16:51.593494', 'step': 3958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.627829', 'step': 3958, 'epoch': 1} {'type': 'loss', 'content': 0.18766848742961884, 'timestamp': '2025-09-30 22:16:51.630652', 'step': 3959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.661740', 'step': 3959, 'epoch': 1} {'type': 'loss', 'content': 0.10437029600143433, 'timestamp': '2025-09-30 22:16:51.685692', 'step': 3960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.720277', 'step': 3960, 'epoch': 1} {'type': 'loss', 'content': 0.21506160497665405, 'timestamp': '2025-09-30 22:16:51.723169', 'step': 3961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.756811', 'step': 3961, 'epoch': 1} {'type': 'loss', 'content': 0.17581996321678162, 'timestamp': '2025-09-30 22:16:51.759476', 'step': 3962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:51.791633', 'step': 3962, 'epoch': 1} {'type': 'loss', 'content': 0.18091411888599396, 'timestamp': '2025-09-30 22:16:51.794475', 'step': 3963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.824512', 'step': 3963, 'epoch': 1} {'type': 'loss', 'content': 0.13188306987285614, 'timestamp': '2025-09-30 22:16:51.851201', 'step': 3964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.881728', 'step': 3964, 'epoch': 1} {'type': 'loss', 'content': 0.14759670197963715, 'timestamp': '2025-09-30 22:16:51.886745', 'step': 3965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:51.918637', 'step': 3965, 'epoch': 1} {'type': 'loss', 'content': 0.22471925616264343, 'timestamp': '2025-09-30 22:16:51.921794', 'step': 3966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:51.953059', 'step': 3966, 'epoch': 1} {'type': 'loss', 'content': 0.09774748235940933, 'timestamp': '2025-09-30 22:16:51.960100', 'step': 3967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:51.990662', 'step': 3967, 'epoch': 1} {'type': 'loss', 'content': 0.1483420580625534, 'timestamp': '2025-09-30 22:16:52.019189', 'step': 3968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.050171', 'step': 3968, 'epoch': 1} {'type': 'loss', 'content': 0.2848593294620514, 'timestamp': '2025-09-30 22:16:52.052426', 'step': 3969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:52.086338', 'step': 3969, 'epoch': 1} {'type': 'loss', 'content': 0.19463925063610077, 'timestamp': '2025-09-30 22:16:52.090620', 'step': 3970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:16:52.123646', 'step': 3970, 'epoch': 1} {'type': 'loss', 'content': 0.11904825270175934, 'timestamp': '2025-09-30 22:16:52.126836', 'step': 3971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:52.163602', 'step': 3971, 'epoch': 1} {'type': 'loss', 'content': 0.17388670146465302, 'timestamp': '2025-09-30 22:16:52.192915', 'step': 3972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:52.224902', 'step': 3972, 'epoch': 1} {'type': 'loss', 'content': 0.2048100382089615, 'timestamp': '2025-09-30 22:16:52.227599', 'step': 3973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.261396', 'step': 3973, 'epoch': 1} {'type': 'loss', 'content': 0.1697022020816803, 'timestamp': '2025-09-30 22:16:52.264275', 'step': 3974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:52.296060', 'step': 3974, 'epoch': 1} {'type': 'loss', 'content': 0.21558624505996704, 'timestamp': '2025-09-30 22:16:52.298657', 'step': 3975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:52.335670', 'step': 3975, 'epoch': 1} {'type': 'loss', 'content': 0.13975706696510315, 'timestamp': '2025-09-30 22:16:52.361817', 'step': 3976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:52.394567', 'step': 3976, 'epoch': 1} {'type': 'loss', 'content': 0.1697627156972885, 'timestamp': '2025-09-30 22:16:52.397747', 'step': 3977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:52.430096', 'step': 3977, 'epoch': 1} {'type': 'loss', 'content': 0.14567963778972626, 'timestamp': '2025-09-30 22:16:52.433907', 'step': 3978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:52.465910', 'step': 3978, 'epoch': 1} {'type': 'loss', 'content': 0.17311520874500275, 'timestamp': '2025-09-30 22:16:52.471535', 'step': 3979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:52.508507', 'step': 3979, 'epoch': 1} {'type': 'loss', 'content': 0.19400888681411743, 'timestamp': '2025-09-30 22:16:52.535482', 'step': 3980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:52.568033', 'step': 3980, 'epoch': 1} {'type': 'loss', 'content': 0.0953618511557579, 'timestamp': '2025-09-30 22:16:52.570171', 'step': 3981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:16:52.606751', 'step': 3981, 'epoch': 1} {'type': 'loss', 'content': 0.16019387543201447, 'timestamp': '2025-09-30 22:16:52.611486', 'step': 3982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.642721', 'step': 3982, 'epoch': 1} {'type': 'loss', 'content': 0.131498321890831, 'timestamp': '2025-09-30 22:16:52.644806', 'step': 3983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.675733', 'step': 3983, 'epoch': 1} {'type': 'loss', 'content': 0.0938643366098404, 'timestamp': '2025-09-30 22:16:52.702865', 'step': 3984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.733829', 'step': 3984, 'epoch': 1} {'type': 'loss', 'content': 0.14350645244121552, 'timestamp': '2025-09-30 22:16:52.736963', 'step': 3985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:52.771283', 'step': 3985, 'epoch': 1} {'type': 'loss', 'content': 0.1342931091785431, 'timestamp': '2025-09-30 22:16:52.776007', 'step': 3986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.810275', 'step': 3986, 'epoch': 1} {'type': 'loss', 'content': 0.11791997402906418, 'timestamp': '2025-09-30 22:16:52.814894', 'step': 3987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:52.848034', 'step': 3987, 'epoch': 1} {'type': 'loss', 'content': 0.09537812322378159, 'timestamp': '2025-09-30 22:16:52.874530', 'step': 3988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:52.908919', 'step': 3988, 'epoch': 1} {'type': 'loss', 'content': 0.1624385118484497, 'timestamp': '2025-09-30 22:16:52.911272', 'step': 3989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:16:52.941978', 'step': 3989, 'epoch': 1} {'type': 'loss', 'content': 0.13562153279781342, 'timestamp': '2025-09-30 22:16:52.946233', 'step': 3990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:52.978821', 'step': 3990, 'epoch': 1} {'type': 'loss', 'content': 0.18183599412441254, 'timestamp': '2025-09-30 22:16:52.986357', 'step': 3991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:53.019006', 'step': 3991, 'epoch': 1} {'type': 'loss', 'content': 0.1356351524591446, 'timestamp': '2025-09-30 22:16:53.043264', 'step': 3992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:53.073830', 'step': 3992, 'epoch': 1} {'type': 'loss', 'content': 0.10042400658130646, 'timestamp': '2025-09-30 22:16:53.085292', 'step': 3993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:53.116219', 'step': 3993, 'epoch': 1} {'type': 'loss', 'content': 0.1487281769514084, 'timestamp': '2025-09-30 22:16:53.122807', 'step': 3994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:53.157391', 'step': 3994, 'epoch': 1} {'type': 'loss', 'content': 0.13387668132781982, 'timestamp': '2025-09-30 22:16:53.163909', 'step': 3995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:53.194725', 'step': 3995, 'epoch': 1} {'type': 'loss', 'content': 0.19109563529491425, 'timestamp': '2025-09-30 22:16:53.223198', 'step': 3996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:53.256631', 'step': 3996, 'epoch': 1} {'type': 'loss', 'content': 0.1334294229745865, 'timestamp': '2025-09-30 22:16:53.263787', 'step': 3997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:53.298882', 'step': 3997, 'epoch': 1} {'type': 'loss', 'content': 0.2235557734966278, 'timestamp': '2025-09-30 22:16:53.303728', 'step': 3998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:53.338787', 'step': 3998, 'epoch': 1} {'type': 'loss', 'content': 0.13388629257678986, 'timestamp': '2025-09-30 22:16:53.356261', 'step': 3999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:53.389980', 'step': 3999, 'epoch': 1} {'type': 'loss', 'content': 0.21474571526050568, 'timestamp': '2025-09-30 22:16:53.417423', 'step': 4000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4000', 'timestamp': '2025-09-30 22:16:58.747241', 'step': 4000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:58.796127', 'step': 4000, 'epoch': 1} {'type': 'loss', 'content': 0.15595172345638275, 'timestamp': '2025-09-30 22:16:58.808189', 'step': 4001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:58.842156', 'step': 4001, 'epoch': 1} {'type': 'loss', 'content': 0.10904968529939651, 'timestamp': '2025-09-30 22:16:58.853458', 'step': 4002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:58.888951', 'step': 4002, 'epoch': 1} {'type': 'loss', 'content': 0.13525889813899994, 'timestamp': '2025-09-30 22:16:58.897206', 'step': 4003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:58.933938', 'step': 4003, 'epoch': 1} {'type': 'loss', 'content': 0.19986996054649353, 'timestamp': '2025-09-30 22:16:58.958673', 'step': 4004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:58.989425', 'step': 4004, 'epoch': 1} {'type': 'loss', 'content': 0.20747342705726624, 'timestamp': '2025-09-30 22:16:58.992132', 'step': 4005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.023481', 'step': 4005, 'epoch': 1} {'type': 'loss', 'content': 0.20905140042304993, 'timestamp': '2025-09-30 22:16:59.034005', 'step': 4006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.065623', 'step': 4006, 'epoch': 1} {'type': 'loss', 'content': 0.16331112384796143, 'timestamp': '2025-09-30 22:16:59.069587', 'step': 4007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.101028', 'step': 4007, 'epoch': 1} {'type': 'loss', 'content': 0.1390814483165741, 'timestamp': '2025-09-30 22:16:59.127657', 'step': 4008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.165343', 'step': 4008, 'epoch': 1} {'type': 'loss', 'content': 0.23122979700565338, 'timestamp': '2025-09-30 22:16:59.168429', 'step': 4009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.199095', 'step': 4009, 'epoch': 1} {'type': 'loss', 'content': 0.11646593362092972, 'timestamp': '2025-09-30 22:16:59.201720', 'step': 4010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:16:59.239062', 'step': 4010, 'epoch': 1} {'type': 'loss', 'content': 0.22913198173046112, 'timestamp': '2025-09-30 22:16:59.248123', 'step': 4011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.279756', 'step': 4011, 'epoch': 1} {'type': 'loss', 'content': 0.10169697552919388, 'timestamp': '2025-09-30 22:16:59.304742', 'step': 4012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.336820', 'step': 4012, 'epoch': 1} {'type': 'loss', 'content': 0.2587818205356598, 'timestamp': '2025-09-30 22:16:59.344942', 'step': 4013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.381323', 'step': 4013, 'epoch': 1} {'type': 'loss', 'content': 0.13875947892665863, 'timestamp': '2025-09-30 22:16:59.384810', 'step': 4014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:59.416520', 'step': 4014, 'epoch': 1} {'type': 'loss', 'content': 0.14624866843223572, 'timestamp': '2025-09-30 22:16:59.424956', 'step': 4015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:16:59.471090', 'step': 4015, 'epoch': 1} {'type': 'loss', 'content': 0.19595739245414734, 'timestamp': '2025-09-30 22:16:59.496338', 'step': 4016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.529346', 'step': 4016, 'epoch': 1} {'type': 'loss', 'content': 0.09807652980089188, 'timestamp': '2025-09-30 22:16:59.538939', 'step': 4017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.569862', 'step': 4017, 'epoch': 1} {'type': 'loss', 'content': 0.14845584332942963, 'timestamp': '2025-09-30 22:16:59.578785', 'step': 4018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.616471', 'step': 4018, 'epoch': 1} {'type': 'loss', 'content': 0.17054378986358643, 'timestamp': '2025-09-30 22:16:59.624019', 'step': 4019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.663037', 'step': 4019, 'epoch': 1} {'type': 'loss', 'content': 0.10351473838090897, 'timestamp': '2025-09-30 22:16:59.692276', 'step': 4020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.723928', 'step': 4020, 'epoch': 1} {'type': 'loss', 'content': 0.1360418051481247, 'timestamp': '2025-09-30 22:16:59.733583', 'step': 4021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.772528', 'step': 4021, 'epoch': 1} {'type': 'loss', 'content': 0.31461113691329956, 'timestamp': '2025-09-30 22:16:59.775918', 'step': 4022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.813947', 'step': 4022, 'epoch': 1} {'type': 'loss', 'content': 0.12832970917224884, 'timestamp': '2025-09-30 22:16:59.816854', 'step': 4023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:16:59.848600', 'step': 4023, 'epoch': 1} {'type': 'loss', 'content': 0.19572703540325165, 'timestamp': '2025-09-30 22:16:59.873543', 'step': 4024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:16:59.910323', 'step': 4024, 'epoch': 1} {'type': 'loss', 'content': 0.11290736496448517, 'timestamp': '2025-09-30 22:16:59.913459', 'step': 4025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:16:59.958786', 'step': 4025, 'epoch': 1} {'type': 'loss', 'content': 0.1404457539319992, 'timestamp': '2025-09-30 22:16:59.968350', 'step': 4026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.008995', 'step': 4026, 'epoch': 1} {'type': 'loss', 'content': 0.16573339700698853, 'timestamp': '2025-09-30 22:17:00.016681', 'step': 4027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:00.047742', 'step': 4027, 'epoch': 1} {'type': 'loss', 'content': 0.14908766746520996, 'timestamp': '2025-09-30 22:17:00.072829', 'step': 4028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.104920', 'step': 4028, 'epoch': 1} {'type': 'loss', 'content': 0.19923856854438782, 'timestamp': '2025-09-30 22:17:00.115447', 'step': 4029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:00.154529', 'step': 4029, 'epoch': 1} {'type': 'loss', 'content': 0.13850226998329163, 'timestamp': '2025-09-30 22:17:00.163894', 'step': 4030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:00.202041', 'step': 4030, 'epoch': 1} {'type': 'loss', 'content': 0.1738513708114624, 'timestamp': '2025-09-30 22:17:00.209300', 'step': 4031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:00.245708', 'step': 4031, 'epoch': 1} {'type': 'loss', 'content': 0.17385922372341156, 'timestamp': '2025-09-30 22:17:00.276502', 'step': 4032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:00.312250', 'step': 4032, 'epoch': 1} {'type': 'loss', 'content': 0.17673125863075256, 'timestamp': '2025-09-30 22:17:00.315173', 'step': 4033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:00.346675', 'step': 4033, 'epoch': 1} {'type': 'loss', 'content': 0.18628236651420593, 'timestamp': '2025-09-30 22:17:00.353649', 'step': 4034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.386312', 'step': 4034, 'epoch': 1} {'type': 'loss', 'content': 0.17553982138633728, 'timestamp': '2025-09-30 22:17:00.393822', 'step': 4035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:00.430096', 'step': 4035, 'epoch': 1} {'type': 'loss', 'content': 0.240014910697937, 'timestamp': '2025-09-30 22:17:00.458513', 'step': 4036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.489614', 'step': 4036, 'epoch': 1} {'type': 'loss', 'content': 0.18089815974235535, 'timestamp': '2025-09-30 22:17:00.498660', 'step': 4037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.534524', 'step': 4037, 'epoch': 1} {'type': 'loss', 'content': 0.16694313287734985, 'timestamp': '2025-09-30 22:17:00.539371', 'step': 4038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.571995', 'step': 4038, 'epoch': 1} {'type': 'loss', 'content': 0.15079514682292938, 'timestamp': '2025-09-30 22:17:00.580562', 'step': 4039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:00.626429', 'step': 4039, 'epoch': 1} {'type': 'loss', 'content': 0.09291143715381622, 'timestamp': '2025-09-30 22:17:00.651100', 'step': 4040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.683277', 'step': 4040, 'epoch': 1} {'type': 'loss', 'content': 0.16354483366012573, 'timestamp': '2025-09-30 22:17:00.689294', 'step': 4041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.725175', 'step': 4041, 'epoch': 1} {'type': 'loss', 'content': 0.1337593048810959, 'timestamp': '2025-09-30 22:17:00.728119', 'step': 4042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.763045', 'step': 4042, 'epoch': 1} {'type': 'loss', 'content': 0.20131368935108185, 'timestamp': '2025-09-30 22:17:00.774514', 'step': 4043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:00.812780', 'step': 4043, 'epoch': 1} {'type': 'loss', 'content': 0.15437088906764984, 'timestamp': '2025-09-30 22:17:00.837643', 'step': 4044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:00.869160', 'step': 4044, 'epoch': 1} {'type': 'loss', 'content': 0.14762911200523376, 'timestamp': '2025-09-30 22:17:00.879651', 'step': 4045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:00.917650', 'step': 4045, 'epoch': 1} {'type': 'loss', 'content': 0.11317764967679977, 'timestamp': '2025-09-30 22:17:00.923763', 'step': 4046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:00.961156', 'step': 4046, 'epoch': 1} {'type': 'loss', 'content': 0.12853959202766418, 'timestamp': '2025-09-30 22:17:00.968845', 'step': 4047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:01.003111', 'step': 4047, 'epoch': 1} {'type': 'loss', 'content': 0.2241990864276886, 'timestamp': '2025-09-30 22:17:01.036598', 'step': 4048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.070275', 'step': 4048, 'epoch': 1} {'type': 'loss', 'content': 0.12656305730342865, 'timestamp': '2025-09-30 22:17:01.073503', 'step': 4049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.105837', 'step': 4049, 'epoch': 1} {'type': 'loss', 'content': 0.20225657522678375, 'timestamp': '2025-09-30 22:17:01.114164', 'step': 4050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.148747', 'step': 4050, 'epoch': 1} {'type': 'loss', 'content': 0.15265057981014252, 'timestamp': '2025-09-30 22:17:01.152144', 'step': 4051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:01.189087', 'step': 4051, 'epoch': 1} {'type': 'loss', 'content': 0.17852701246738434, 'timestamp': '2025-09-30 22:17:01.213649', 'step': 4052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:01.251498', 'step': 4052, 'epoch': 1} {'type': 'loss', 'content': 0.07748014479875565, 'timestamp': '2025-09-30 22:17:01.258454', 'step': 4053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.295537', 'step': 4053, 'epoch': 1} {'type': 'loss', 'content': 0.23494350910186768, 'timestamp': '2025-09-30 22:17:01.304001', 'step': 4054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:01.341334', 'step': 4054, 'epoch': 1} {'type': 'loss', 'content': 0.15165556967258453, 'timestamp': '2025-09-30 22:17:01.344342', 'step': 4055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.382034', 'step': 4055, 'epoch': 1} {'type': 'loss', 'content': 0.1631193608045578, 'timestamp': '2025-09-30 22:17:01.412018', 'step': 4056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.451272', 'step': 4056, 'epoch': 1} {'type': 'loss', 'content': 0.21634213626384735, 'timestamp': '2025-09-30 22:17:01.462539', 'step': 4057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.499755', 'step': 4057, 'epoch': 1} {'type': 'loss', 'content': 0.2042238712310791, 'timestamp': '2025-09-30 22:17:01.503086', 'step': 4058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:01.534793', 'step': 4058, 'epoch': 1} {'type': 'loss', 'content': 0.18794800341129303, 'timestamp': '2025-09-30 22:17:01.539284', 'step': 4059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.576941', 'step': 4059, 'epoch': 1} {'type': 'loss', 'content': 0.18657612800598145, 'timestamp': '2025-09-30 22:17:01.608200', 'step': 4060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:01.645035', 'step': 4060, 'epoch': 1} {'type': 'loss', 'content': 0.2321503460407257, 'timestamp': '2025-09-30 22:17:01.655883', 'step': 4061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:01.694543', 'step': 4061, 'epoch': 1} {'type': 'loss', 'content': 0.1677546203136444, 'timestamp': '2025-09-30 22:17:01.698954', 'step': 4062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.730230', 'step': 4062, 'epoch': 1} {'type': 'loss', 'content': 0.18835760653018951, 'timestamp': '2025-09-30 22:17:01.734856', 'step': 4063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:01.765928', 'step': 4063, 'epoch': 1} {'type': 'loss', 'content': 0.22670026123523712, 'timestamp': '2025-09-30 22:17:01.800113', 'step': 4064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.838802', 'step': 4064, 'epoch': 1} {'type': 'loss', 'content': 0.14464111626148224, 'timestamp': '2025-09-30 22:17:01.847744', 'step': 4065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.889333', 'step': 4065, 'epoch': 1} {'type': 'loss', 'content': 0.16755738854408264, 'timestamp': '2025-09-30 22:17:01.892701', 'step': 4066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:01.923157', 'step': 4066, 'epoch': 1} {'type': 'loss', 'content': 0.17854657769203186, 'timestamp': '2025-09-30 22:17:01.926757', 'step': 4067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:01.960993', 'step': 4067, 'epoch': 1} {'type': 'loss', 'content': 0.10698381066322327, 'timestamp': '2025-09-30 22:17:01.988526', 'step': 4068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:02.019584', 'step': 4068, 'epoch': 1} {'type': 'loss', 'content': 0.1802416890859604, 'timestamp': '2025-09-30 22:17:02.028674', 'step': 4069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:02.071620', 'step': 4069, 'epoch': 1} {'type': 'loss', 'content': 0.13519741594791412, 'timestamp': '2025-09-30 22:17:02.075217', 'step': 4070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.106543', 'step': 4070, 'epoch': 1} {'type': 'loss', 'content': 0.11265019327402115, 'timestamp': '2025-09-30 22:17:02.117135', 'step': 4071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.158870', 'step': 4071, 'epoch': 1} {'type': 'loss', 'content': 0.14559903740882874, 'timestamp': '2025-09-30 22:17:02.191868', 'step': 4072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.223832', 'step': 4072, 'epoch': 1} {'type': 'loss', 'content': 0.24462728202342987, 'timestamp': '2025-09-30 22:17:02.229739', 'step': 4073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:02.262988', 'step': 4073, 'epoch': 1} {'type': 'loss', 'content': 0.08438751101493835, 'timestamp': '2025-09-30 22:17:02.267548', 'step': 4074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:02.300268', 'step': 4074, 'epoch': 1} {'type': 'loss', 'content': 0.1833672970533371, 'timestamp': '2025-09-30 22:17:02.303243', 'step': 4075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:02.340086', 'step': 4075, 'epoch': 1} {'type': 'loss', 'content': 0.2045757919549942, 'timestamp': '2025-09-30 22:17:02.372271', 'step': 4076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:02.402889', 'step': 4076, 'epoch': 1} {'type': 'loss', 'content': 0.1769261509180069, 'timestamp': '2025-09-30 22:17:02.411524', 'step': 4077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.442515', 'step': 4077, 'epoch': 1} {'type': 'loss', 'content': 0.14272968471050262, 'timestamp': '2025-09-30 22:17:02.445937', 'step': 4078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.476983', 'step': 4078, 'epoch': 1} {'type': 'loss', 'content': 0.11511105298995972, 'timestamp': '2025-09-30 22:17:02.486029', 'step': 4079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.517928', 'step': 4079, 'epoch': 1} {'type': 'loss', 'content': 0.26649048924446106, 'timestamp': '2025-09-30 22:17:02.549089', 'step': 4080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.585231', 'step': 4080, 'epoch': 1} {'type': 'loss', 'content': 0.16067199409008026, 'timestamp': '2025-09-30 22:17:02.594597', 'step': 4081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.631167', 'step': 4081, 'epoch': 1} {'type': 'loss', 'content': 0.19813692569732666, 'timestamp': '2025-09-30 22:17:02.634010', 'step': 4082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.665896', 'step': 4082, 'epoch': 1} {'type': 'loss', 'content': 0.1094609797000885, 'timestamp': '2025-09-30 22:17:02.669241', 'step': 4083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:02.709208', 'step': 4083, 'epoch': 1} {'type': 'loss', 'content': 0.17706537246704102, 'timestamp': '2025-09-30 22:17:02.738702', 'step': 4084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:02.772966', 'step': 4084, 'epoch': 1} {'type': 'loss', 'content': 0.1837097555398941, 'timestamp': '2025-09-30 22:17:02.780707', 'step': 4085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.819692', 'step': 4085, 'epoch': 1} {'type': 'loss', 'content': 0.14434389770030975, 'timestamp': '2025-09-30 22:17:02.833215', 'step': 4086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:02.865724', 'step': 4086, 'epoch': 1} {'type': 'loss', 'content': 0.1711457073688507, 'timestamp': '2025-09-30 22:17:02.869155', 'step': 4087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:02.903641', 'step': 4087, 'epoch': 1} {'type': 'loss', 'content': 0.18788807094097137, 'timestamp': '2025-09-30 22:17:02.929987', 'step': 4088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:02.963314', 'step': 4088, 'epoch': 1} {'type': 'loss', 'content': 0.09425384551286697, 'timestamp': '2025-09-30 22:17:02.973681', 'step': 4089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.005302', 'step': 4089, 'epoch': 1} {'type': 'loss', 'content': 0.061792150139808655, 'timestamp': '2025-09-30 22:17:03.008234', 'step': 4090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.040680', 'step': 4090, 'epoch': 1} {'type': 'loss', 'content': 0.137788325548172, 'timestamp': '2025-09-30 22:17:03.044149', 'step': 4091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:03.096025', 'step': 4091, 'epoch': 1} {'type': 'loss', 'content': 0.1236225962638855, 'timestamp': '2025-09-30 22:17:03.120350', 'step': 4092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.151951', 'step': 4092, 'epoch': 1} {'type': 'loss', 'content': 0.18765436112880707, 'timestamp': '2025-09-30 22:17:03.159786', 'step': 4093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.195925', 'step': 4093, 'epoch': 1} {'type': 'loss', 'content': 0.10479019582271576, 'timestamp': '2025-09-30 22:17:03.198631', 'step': 4094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.230225', 'step': 4094, 'epoch': 1} {'type': 'loss', 'content': 0.14817219972610474, 'timestamp': '2025-09-30 22:17:03.232479', 'step': 4095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.269875', 'step': 4095, 'epoch': 1} {'type': 'loss', 'content': 0.1917908787727356, 'timestamp': '2025-09-30 22:17:03.300393', 'step': 4096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.331134', 'step': 4096, 'epoch': 1} {'type': 'loss', 'content': 0.12283588945865631, 'timestamp': '2025-09-30 22:17:03.333992', 'step': 4097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:03.372009', 'step': 4097, 'epoch': 1} {'type': 'loss', 'content': 0.15519879758358002, 'timestamp': '2025-09-30 22:17:03.375375', 'step': 4098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.408386', 'step': 4098, 'epoch': 1} {'type': 'loss', 'content': 0.2180631458759308, 'timestamp': '2025-09-30 22:17:03.411571', 'step': 4099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.443999', 'step': 4099, 'epoch': 1} {'type': 'loss', 'content': 0.16715191304683685, 'timestamp': '2025-09-30 22:17:03.476428', 'step': 4100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.516136', 'step': 4100, 'epoch': 1} {'type': 'loss', 'content': 0.16607320308685303, 'timestamp': '2025-09-30 22:17:03.519269', 'step': 4101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:03.552142', 'step': 4101, 'epoch': 1} {'type': 'loss', 'content': 0.15309403836727142, 'timestamp': '2025-09-30 22:17:03.562783', 'step': 4102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.598040', 'step': 4102, 'epoch': 1} {'type': 'loss', 'content': 0.20336607098579407, 'timestamp': '2025-09-30 22:17:03.608165', 'step': 4103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.646064', 'step': 4103, 'epoch': 1} {'type': 'loss', 'content': 0.10579440742731094, 'timestamp': '2025-09-30 22:17:03.677244', 'step': 4104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:03.709166', 'step': 4104, 'epoch': 1} {'type': 'loss', 'content': 0.09428031742572784, 'timestamp': '2025-09-30 22:17:03.714380', 'step': 4105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:03.746791', 'step': 4105, 'epoch': 1} {'type': 'loss', 'content': 0.1745559275150299, 'timestamp': '2025-09-30 22:17:03.750671', 'step': 4106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.797345', 'step': 4106, 'epoch': 1} {'type': 'loss', 'content': 0.19175264239311218, 'timestamp': '2025-09-30 22:17:03.816365', 'step': 4107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:03.848963', 'step': 4107, 'epoch': 1} {'type': 'loss', 'content': 0.1677323877811432, 'timestamp': '2025-09-30 22:17:03.875515', 'step': 4108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:03.907450', 'step': 4108, 'epoch': 1} {'type': 'loss', 'content': 0.1706238090991974, 'timestamp': '2025-09-30 22:17:03.911330', 'step': 4109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.945126', 'step': 4109, 'epoch': 1} {'type': 'loss', 'content': 0.13334707915782928, 'timestamp': '2025-09-30 22:17:03.948530', 'step': 4110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:03.984480', 'step': 4110, 'epoch': 1} {'type': 'loss', 'content': 0.26925039291381836, 'timestamp': '2025-09-30 22:17:03.989290', 'step': 4111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.023251', 'step': 4111, 'epoch': 1} {'type': 'loss', 'content': 0.11091826856136322, 'timestamp': '2025-09-30 22:17:04.048826', 'step': 4112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.083002', 'step': 4112, 'epoch': 1} {'type': 'loss', 'content': 0.13836050033569336, 'timestamp': '2025-09-30 22:17:04.085939', 'step': 4113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:04.118849', 'step': 4113, 'epoch': 1} {'type': 'loss', 'content': 0.15000618994235992, 'timestamp': '2025-09-30 22:17:04.121849', 'step': 4114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:04.157337', 'step': 4114, 'epoch': 1} {'type': 'loss', 'content': 0.1301901638507843, 'timestamp': '2025-09-30 22:17:04.160320', 'step': 4115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:04.190216', 'step': 4115, 'epoch': 1} {'type': 'loss', 'content': 0.09773126244544983, 'timestamp': '2025-09-30 22:17:04.214817', 'step': 4116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:04.249622', 'step': 4116, 'epoch': 1} {'type': 'loss', 'content': 0.1787506341934204, 'timestamp': '2025-09-30 22:17:04.254614', 'step': 4117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:04.286554', 'step': 4117, 'epoch': 1} {'type': 'loss', 'content': 0.1059180200099945, 'timestamp': '2025-09-30 22:17:04.288987', 'step': 4118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.319656', 'step': 4118, 'epoch': 1} {'type': 'loss', 'content': 0.18228574097156525, 'timestamp': '2025-09-30 22:17:04.322519', 'step': 4119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.352701', 'step': 4119, 'epoch': 1} {'type': 'loss', 'content': 0.14731523394584656, 'timestamp': '2025-09-30 22:17:04.379882', 'step': 4120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.410940', 'step': 4120, 'epoch': 1} {'type': 'loss', 'content': 0.19482064247131348, 'timestamp': '2025-09-30 22:17:04.413238', 'step': 4121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.443163', 'step': 4121, 'epoch': 1} {'type': 'loss', 'content': 0.13778263330459595, 'timestamp': '2025-09-30 22:17:04.445929', 'step': 4122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.477099', 'step': 4122, 'epoch': 1} {'type': 'loss', 'content': 0.20987112820148468, 'timestamp': '2025-09-30 22:17:04.480972', 'step': 4123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.511876', 'step': 4123, 'epoch': 1} {'type': 'loss', 'content': 0.1965550035238266, 'timestamp': '2025-09-30 22:17:04.536070', 'step': 4124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.566134', 'step': 4124, 'epoch': 1} {'type': 'loss', 'content': 0.18598739802837372, 'timestamp': '2025-09-30 22:17:04.569010', 'step': 4125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.600733', 'step': 4125, 'epoch': 1} {'type': 'loss', 'content': 0.20608267188072205, 'timestamp': '2025-09-30 22:17:04.604411', 'step': 4126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:04.636170', 'step': 4126, 'epoch': 1} {'type': 'loss', 'content': 0.1048157662153244, 'timestamp': '2025-09-30 22:17:04.639462', 'step': 4127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:04.670800', 'step': 4127, 'epoch': 1} {'type': 'loss', 'content': 0.1281006634235382, 'timestamp': '2025-09-30 22:17:04.696779', 'step': 4128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.727278', 'step': 4128, 'epoch': 1} {'type': 'loss', 'content': 0.07849527895450592, 'timestamp': '2025-09-30 22:17:04.729230', 'step': 4129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:04.760275', 'step': 4129, 'epoch': 1} {'type': 'loss', 'content': 0.1859494000673294, 'timestamp': '2025-09-30 22:17:04.764963', 'step': 4130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.797488', 'step': 4130, 'epoch': 1} {'type': 'loss', 'content': 0.17880462110042572, 'timestamp': '2025-09-30 22:17:04.801228', 'step': 4131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.832475', 'step': 4131, 'epoch': 1} {'type': 'loss', 'content': 0.1266068071126938, 'timestamp': '2025-09-30 22:17:04.858040', 'step': 4132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:04.897096', 'step': 4132, 'epoch': 1} {'type': 'loss', 'content': 0.22816841304302216, 'timestamp': '2025-09-30 22:17:04.899801', 'step': 4133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:04.933060', 'step': 4133, 'epoch': 1} {'type': 'loss', 'content': 0.16894470155239105, 'timestamp': '2025-09-30 22:17:04.938250', 'step': 4134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:04.969071', 'step': 4134, 'epoch': 1} {'type': 'loss', 'content': 0.17764268815517426, 'timestamp': '2025-09-30 22:17:04.978737', 'step': 4135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.016298', 'step': 4135, 'epoch': 1} {'type': 'loss', 'content': 0.204424187541008, 'timestamp': '2025-09-30 22:17:05.047181', 'step': 4136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.076935', 'step': 4136, 'epoch': 1} {'type': 'loss', 'content': 0.09929075092077255, 'timestamp': '2025-09-30 22:17:05.080053', 'step': 4137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:05.120554', 'step': 4137, 'epoch': 1} {'type': 'loss', 'content': 0.19077856838703156, 'timestamp': '2025-09-30 22:17:05.125607', 'step': 4138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.157024', 'step': 4138, 'epoch': 1} {'type': 'loss', 'content': 0.1895824670791626, 'timestamp': '2025-09-30 22:17:05.159771', 'step': 4139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.190478', 'step': 4139, 'epoch': 1} {'type': 'loss', 'content': 0.11154119670391083, 'timestamp': '2025-09-30 22:17:05.215411', 'step': 4140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.245677', 'step': 4140, 'epoch': 1} {'type': 'loss', 'content': 0.1266224980354309, 'timestamp': '2025-09-30 22:17:05.248106', 'step': 4141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.284165', 'step': 4141, 'epoch': 1} {'type': 'loss', 'content': 0.08344030380249023, 'timestamp': '2025-09-30 22:17:05.288273', 'step': 4142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.319121', 'step': 4142, 'epoch': 1} {'type': 'loss', 'content': 0.16461215913295746, 'timestamp': '2025-09-30 22:17:05.330212', 'step': 4143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.363272', 'step': 4143, 'epoch': 1} {'type': 'loss', 'content': 0.1609775573015213, 'timestamp': '2025-09-30 22:17:05.395366', 'step': 4144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.426121', 'step': 4144, 'epoch': 1} {'type': 'loss', 'content': 0.16163690388202667, 'timestamp': '2025-09-30 22:17:05.435656', 'step': 4145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.471707', 'step': 4145, 'epoch': 1} {'type': 'loss', 'content': 0.19226619601249695, 'timestamp': '2025-09-30 22:17:05.476133', 'step': 4146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:05.508187', 'step': 4146, 'epoch': 1} {'type': 'loss', 'content': 0.198917418718338, 'timestamp': '2025-09-30 22:17:05.511429', 'step': 4147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.548612', 'step': 4147, 'epoch': 1} {'type': 'loss', 'content': 0.13976387679576874, 'timestamp': '2025-09-30 22:17:05.574055', 'step': 4148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:05.614219', 'step': 4148, 'epoch': 1} {'type': 'loss', 'content': 0.09620852023363113, 'timestamp': '2025-09-30 22:17:05.617348', 'step': 4149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:05.648592', 'step': 4149, 'epoch': 1} {'type': 'loss', 'content': 0.1694536954164505, 'timestamp': '2025-09-30 22:17:05.657493', 'step': 4150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.697070', 'step': 4150, 'epoch': 1} {'type': 'loss', 'content': 0.17954938113689423, 'timestamp': '2025-09-30 22:17:05.707401', 'step': 4151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:05.744636', 'step': 4151, 'epoch': 1} {'type': 'loss', 'content': 0.08525197207927704, 'timestamp': '2025-09-30 22:17:05.775234', 'step': 4152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:05.805301', 'step': 4152, 'epoch': 1} {'type': 'loss', 'content': 0.1729416698217392, 'timestamp': '2025-09-30 22:17:05.808946', 'step': 4153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:05.847826', 'step': 4153, 'epoch': 1} {'type': 'loss', 'content': 0.18933427333831787, 'timestamp': '2025-09-30 22:17:05.851974', 'step': 4154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.884185', 'step': 4154, 'epoch': 1} {'type': 'loss', 'content': 0.061363544315099716, 'timestamp': '2025-09-30 22:17:05.888236', 'step': 4155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:05.918763', 'step': 4155, 'epoch': 1} {'type': 'loss', 'content': 0.17852972447872162, 'timestamp': '2025-09-30 22:17:05.949827', 'step': 4156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:05.981002', 'step': 4156, 'epoch': 1} {'type': 'loss', 'content': 0.20590198040008545, 'timestamp': '2025-09-30 22:17:05.990227', 'step': 4157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:06.029274', 'step': 4157, 'epoch': 1} {'type': 'loss', 'content': 0.13790278136730194, 'timestamp': '2025-09-30 22:17:06.032660', 'step': 4158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.064476', 'step': 4158, 'epoch': 1} {'type': 'loss', 'content': 0.20066948235034943, 'timestamp': '2025-09-30 22:17:06.074438', 'step': 4159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.121261', 'step': 4159, 'epoch': 1} {'type': 'loss', 'content': 0.2078082412481308, 'timestamp': '2025-09-30 22:17:06.152986', 'step': 4160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.185287', 'step': 4160, 'epoch': 1} {'type': 'loss', 'content': 0.12402070313692093, 'timestamp': '2025-09-30 22:17:06.189575', 'step': 4161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:06.229776', 'step': 4161, 'epoch': 1} {'type': 'loss', 'content': 0.1948557198047638, 'timestamp': '2025-09-30 22:17:06.240278', 'step': 4162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.288629', 'step': 4162, 'epoch': 1} {'type': 'loss', 'content': 0.16511811316013336, 'timestamp': '2025-09-30 22:17:06.292246', 'step': 4163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:06.337463', 'step': 4163, 'epoch': 1} {'type': 'loss', 'content': 0.08892005681991577, 'timestamp': '2025-09-30 22:17:06.370378', 'step': 4164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.410507', 'step': 4164, 'epoch': 1} {'type': 'loss', 'content': 0.14886018633842468, 'timestamp': '2025-09-30 22:17:06.413423', 'step': 4165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.444923', 'step': 4165, 'epoch': 1} {'type': 'loss', 'content': 0.16907231509685516, 'timestamp': '2025-09-30 22:17:06.448239', 'step': 4166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.479385', 'step': 4166, 'epoch': 1} {'type': 'loss', 'content': 0.13824759423732758, 'timestamp': '2025-09-30 22:17:06.482711', 'step': 4167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.520240', 'step': 4167, 'epoch': 1} {'type': 'loss', 'content': 0.1036684438586235, 'timestamp': '2025-09-30 22:17:06.544976', 'step': 4168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:06.586344', 'step': 4168, 'epoch': 1} {'type': 'loss', 'content': 0.1072545126080513, 'timestamp': '2025-09-30 22:17:06.590552', 'step': 4169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:06.626038', 'step': 4169, 'epoch': 1} {'type': 'loss', 'content': 0.10866475850343704, 'timestamp': '2025-09-30 22:17:06.635848', 'step': 4170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:06.668992', 'step': 4170, 'epoch': 1} {'type': 'loss', 'content': 0.12059911340475082, 'timestamp': '2025-09-30 22:17:06.679129', 'step': 4171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:06.719729', 'step': 4171, 'epoch': 1} {'type': 'loss', 'content': 0.1308160126209259, 'timestamp': '2025-09-30 22:17:06.751376', 'step': 4172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:06.782505', 'step': 4172, 'epoch': 1} {'type': 'loss', 'content': 0.1568930447101593, 'timestamp': '2025-09-30 22:17:06.786885', 'step': 4173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:06.825068', 'step': 4173, 'epoch': 1} {'type': 'loss', 'content': 0.11406956613063812, 'timestamp': '2025-09-30 22:17:06.836497', 'step': 4174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.876131', 'step': 4174, 'epoch': 1} {'type': 'loss', 'content': 0.09283296763896942, 'timestamp': '2025-09-30 22:17:06.880446', 'step': 4175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:06.916792', 'step': 4175, 'epoch': 1} {'type': 'loss', 'content': 0.09877999126911163, 'timestamp': '2025-09-30 22:17:06.950029', 'step': 4176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:06.982108', 'step': 4176, 'epoch': 1} {'type': 'loss', 'content': 0.17946143448352814, 'timestamp': '2025-09-30 22:17:06.989568', 'step': 4177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.029496', 'step': 4177, 'epoch': 1} {'type': 'loss', 'content': 0.16170823574066162, 'timestamp': '2025-09-30 22:17:07.042280', 'step': 4178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:07.082686', 'step': 4178, 'epoch': 1} {'type': 'loss', 'content': 0.18928785622119904, 'timestamp': '2025-09-30 22:17:07.094376', 'step': 4179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:07.134570', 'step': 4179, 'epoch': 1} {'type': 'loss', 'content': 0.13765504956245422, 'timestamp': '2025-09-30 22:17:07.159365', 'step': 4180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.190812', 'step': 4180, 'epoch': 1} {'type': 'loss', 'content': 0.05191880092024803, 'timestamp': '2025-09-30 22:17:07.200262', 'step': 4181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.238956', 'step': 4181, 'epoch': 1} {'type': 'loss', 'content': 0.13132572174072266, 'timestamp': '2025-09-30 22:17:07.241816', 'step': 4182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.272151', 'step': 4182, 'epoch': 1} {'type': 'loss', 'content': 0.22100378572940826, 'timestamp': '2025-09-30 22:17:07.283250', 'step': 4183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:07.314430', 'step': 4183, 'epoch': 1} {'type': 'loss', 'content': 0.12972261011600494, 'timestamp': '2025-09-30 22:17:07.338839', 'step': 4184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:07.377028', 'step': 4184, 'epoch': 1} {'type': 'loss', 'content': 0.1514284759759903, 'timestamp': '2025-09-30 22:17:07.380397', 'step': 4185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:07.411230', 'step': 4185, 'epoch': 1} {'type': 'loss', 'content': 0.09621108323335648, 'timestamp': '2025-09-30 22:17:07.414321', 'step': 4186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.445478', 'step': 4186, 'epoch': 1} {'type': 'loss', 'content': 0.20743650197982788, 'timestamp': '2025-09-30 22:17:07.455820', 'step': 4187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.491168', 'step': 4187, 'epoch': 1} {'type': 'loss', 'content': 0.10343924909830093, 'timestamp': '2025-09-30 22:17:07.515445', 'step': 4188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:07.555179', 'step': 4188, 'epoch': 1} {'type': 'loss', 'content': 0.09775211662054062, 'timestamp': '2025-09-30 22:17:07.565035', 'step': 4189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:07.603879', 'step': 4189, 'epoch': 1} {'type': 'loss', 'content': 0.22485394775867462, 'timestamp': '2025-09-30 22:17:07.608407', 'step': 4190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.640172', 'step': 4190, 'epoch': 1} {'type': 'loss', 'content': 0.14429381489753723, 'timestamp': '2025-09-30 22:17:07.644220', 'step': 4191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:07.676102', 'step': 4191, 'epoch': 1} {'type': 'loss', 'content': 0.14644770324230194, 'timestamp': '2025-09-30 22:17:07.708796', 'step': 4192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:07.739717', 'step': 4192, 'epoch': 1} {'type': 'loss', 'content': 0.16108480095863342, 'timestamp': '2025-09-30 22:17:07.742413', 'step': 4193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:07.772923', 'step': 4193, 'epoch': 1} {'type': 'loss', 'content': 0.2178514301776886, 'timestamp': '2025-09-30 22:17:07.780439', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:17:15.493207', 'step': 4194, 'epoch': 1} {'type': 'pplx', 'content': 8869.06199287772, 'timestamp': '2025-09-30 22:17:15.502808', 'step': 4194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:15.552487', 'step': 4194, 'epoch': 1} {'type': 'loss', 'content': 0.16675671935081482, 'timestamp': '2025-09-30 22:17:15.555877', 'step': 4195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:15.589333', 'step': 4195, 'epoch': 1} {'type': 'loss', 'content': 0.12420202046632767, 'timestamp': '2025-09-30 22:17:15.614692', 'step': 4196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.650531', 'step': 4196, 'epoch': 1} {'type': 'loss', 'content': 0.13689765334129333, 'timestamp': '2025-09-30 22:17:15.653710', 'step': 4197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.690474', 'step': 4197, 'epoch': 1} {'type': 'loss', 'content': 0.125027135014534, 'timestamp': '2025-09-30 22:17:15.693458', 'step': 4198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:15.727095', 'step': 4198, 'epoch': 1} {'type': 'loss', 'content': 0.12233699858188629, 'timestamp': '2025-09-30 22:17:15.731028', 'step': 4199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.762044', 'step': 4199, 'epoch': 1} {'type': 'loss', 'content': 0.1740054041147232, 'timestamp': '2025-09-30 22:17:15.790568', 'step': 4200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.826409', 'step': 4200, 'epoch': 1} {'type': 'loss', 'content': 0.10248902440071106, 'timestamp': '2025-09-30 22:17:15.832828', 'step': 4201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.868676', 'step': 4201, 'epoch': 1} {'type': 'loss', 'content': 0.2414325773715973, 'timestamp': '2025-09-30 22:17:15.874886', 'step': 4202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:15.909213', 'step': 4202, 'epoch': 1} {'type': 'loss', 'content': 0.1998753845691681, 'timestamp': '2025-09-30 22:17:15.912373', 'step': 4203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:15.944284', 'step': 4203, 'epoch': 1} {'type': 'loss', 'content': 0.2534606158733368, 'timestamp': '2025-09-30 22:17:15.968783', 'step': 4204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.005155', 'step': 4204, 'epoch': 1} {'type': 'loss', 'content': 0.1419602334499359, 'timestamp': '2025-09-30 22:17:16.014010', 'step': 4205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.050762', 'step': 4205, 'epoch': 1} {'type': 'loss', 'content': 0.17391277849674225, 'timestamp': '2025-09-30 22:17:16.054489', 'step': 4206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.086019', 'step': 4206, 'epoch': 1} {'type': 'loss', 'content': 0.15433277189731598, 'timestamp': '2025-09-30 22:17:16.089549', 'step': 4207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.120272', 'step': 4207, 'epoch': 1} {'type': 'loss', 'content': 0.1274431049823761, 'timestamp': '2025-09-30 22:17:16.144241', 'step': 4208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.176384', 'step': 4208, 'epoch': 1} {'type': 'loss', 'content': 0.20802360773086548, 'timestamp': '2025-09-30 22:17:16.186824', 'step': 4209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.227160', 'step': 4209, 'epoch': 1} {'type': 'loss', 'content': 0.2037428915500641, 'timestamp': '2025-09-30 22:17:16.229354', 'step': 4210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.259897', 'step': 4210, 'epoch': 1} {'type': 'loss', 'content': 0.11928410828113556, 'timestamp': '2025-09-30 22:17:16.267401', 'step': 4211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.298589', 'step': 4211, 'epoch': 1} {'type': 'loss', 'content': 0.29210710525512695, 'timestamp': '2025-09-30 22:17:16.328187', 'step': 4212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.360125', 'step': 4212, 'epoch': 1} {'type': 'loss', 'content': 0.11690313369035721, 'timestamp': '2025-09-30 22:17:16.364982', 'step': 4213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.397303', 'step': 4213, 'epoch': 1} {'type': 'loss', 'content': 0.15353941917419434, 'timestamp': '2025-09-30 22:17:16.403792', 'step': 4214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.439508', 'step': 4214, 'epoch': 1} {'type': 'loss', 'content': 0.1870562583208084, 'timestamp': '2025-09-30 22:17:16.445239', 'step': 4215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.480753', 'step': 4215, 'epoch': 1} {'type': 'loss', 'content': 0.1677720993757248, 'timestamp': '2025-09-30 22:17:16.507501', 'step': 4216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.539946', 'step': 4216, 'epoch': 1} {'type': 'loss', 'content': 0.1645125299692154, 'timestamp': '2025-09-30 22:17:16.546789', 'step': 4217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.582387', 'step': 4217, 'epoch': 1} {'type': 'loss', 'content': 0.22336219251155853, 'timestamp': '2025-09-30 22:17:16.584977', 'step': 4218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.615540', 'step': 4218, 'epoch': 1} {'type': 'loss', 'content': 0.17583701014518738, 'timestamp': '2025-09-30 22:17:16.618481', 'step': 4219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:16.649432', 'step': 4219, 'epoch': 1} {'type': 'loss', 'content': 0.14785586297512054, 'timestamp': '2025-09-30 22:17:16.673639', 'step': 4220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.707038', 'step': 4220, 'epoch': 1} {'type': 'loss', 'content': 0.10097779333591461, 'timestamp': '2025-09-30 22:17:16.711395', 'step': 4221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.745220', 'step': 4221, 'epoch': 1} {'type': 'loss', 'content': 0.09330153465270996, 'timestamp': '2025-09-30 22:17:16.748655', 'step': 4222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.778938', 'step': 4222, 'epoch': 1} {'type': 'loss', 'content': 0.20514041185379028, 'timestamp': '2025-09-30 22:17:16.781394', 'step': 4223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.814120', 'step': 4223, 'epoch': 1} {'type': 'loss', 'content': 0.19518443942070007, 'timestamp': '2025-09-30 22:17:16.842323', 'step': 4224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.873902', 'step': 4224, 'epoch': 1} {'type': 'loss', 'content': 0.12610580027103424, 'timestamp': '2025-09-30 22:17:16.876734', 'step': 4225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:16.907735', 'step': 4225, 'epoch': 1} {'type': 'loss', 'content': 0.2066863626241684, 'timestamp': '2025-09-30 22:17:16.910860', 'step': 4226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:16.941749', 'step': 4226, 'epoch': 1} {'type': 'loss', 'content': 0.24465754628181458, 'timestamp': '2025-09-30 22:17:16.945510', 'step': 4227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:16.976819', 'step': 4227, 'epoch': 1} {'type': 'loss', 'content': 0.18861234188079834, 'timestamp': '2025-09-30 22:17:17.001521', 'step': 4228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:17.032075', 'step': 4228, 'epoch': 1} {'type': 'loss', 'content': 0.08649849891662598, 'timestamp': '2025-09-30 22:17:17.034461', 'step': 4229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:17.071766', 'step': 4229, 'epoch': 1} {'type': 'loss', 'content': 0.11904577910900116, 'timestamp': '2025-09-30 22:17:17.079877', 'step': 4230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:17.133836', 'step': 4230, 'epoch': 1} {'type': 'loss', 'content': 0.11509206891059875, 'timestamp': '2025-09-30 22:17:17.136570', 'step': 4231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:17.174272', 'step': 4231, 'epoch': 1} {'type': 'loss', 'content': 0.13421757519245148, 'timestamp': '2025-09-30 22:17:17.198325', 'step': 4232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.236556', 'step': 4232, 'epoch': 1} {'type': 'loss', 'content': 0.2112359255552292, 'timestamp': '2025-09-30 22:17:17.239362', 'step': 4233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:17.270887', 'step': 4233, 'epoch': 1} {'type': 'loss', 'content': 0.2336588203907013, 'timestamp': '2025-09-30 22:17:17.273914', 'step': 4234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:17.308771', 'step': 4234, 'epoch': 1} {'type': 'loss', 'content': 0.2845420837402344, 'timestamp': '2025-09-30 22:17:17.311648', 'step': 4235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.351910', 'step': 4235, 'epoch': 1} {'type': 'loss', 'content': 0.19251519441604614, 'timestamp': '2025-09-30 22:17:17.375862', 'step': 4236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.408275', 'step': 4236, 'epoch': 1} {'type': 'loss', 'content': 0.174441397190094, 'timestamp': '2025-09-30 22:17:17.413922', 'step': 4237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.445285', 'step': 4237, 'epoch': 1} {'type': 'loss', 'content': 0.13295955955982208, 'timestamp': '2025-09-30 22:17:17.457150', 'step': 4238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.488499', 'step': 4238, 'epoch': 1} {'type': 'loss', 'content': 0.1478564739227295, 'timestamp': '2025-09-30 22:17:17.493114', 'step': 4239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.528055', 'step': 4239, 'epoch': 1} {'type': 'loss', 'content': 0.05607575923204422, 'timestamp': '2025-09-30 22:17:17.559922', 'step': 4240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:17.591393', 'step': 4240, 'epoch': 1} {'type': 'loss', 'content': 0.13487493991851807, 'timestamp': '2025-09-30 22:17:17.594702', 'step': 4241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:17.626068', 'step': 4241, 'epoch': 1} {'type': 'loss', 'content': 0.11573256552219391, 'timestamp': '2025-09-30 22:17:17.634690', 'step': 4242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:17.669935', 'step': 4242, 'epoch': 1} {'type': 'loss', 'content': 0.22289763391017914, 'timestamp': '2025-09-30 22:17:17.672980', 'step': 4243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.704578', 'step': 4243, 'epoch': 1} {'type': 'loss', 'content': 0.16140690445899963, 'timestamp': '2025-09-30 22:17:17.729862', 'step': 4244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.760359', 'step': 4244, 'epoch': 1} {'type': 'loss', 'content': 0.13740095496177673, 'timestamp': '2025-09-30 22:17:17.766993', 'step': 4245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.800074', 'step': 4245, 'epoch': 1} {'type': 'loss', 'content': 0.1632300317287445, 'timestamp': '2025-09-30 22:17:17.803123', 'step': 4246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.837951', 'step': 4246, 'epoch': 1} {'type': 'loss', 'content': 0.24910783767700195, 'timestamp': '2025-09-30 22:17:17.841296', 'step': 4247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:17.875907', 'step': 4247, 'epoch': 1} {'type': 'loss', 'content': 0.16936524212360382, 'timestamp': '2025-09-30 22:17:17.900765', 'step': 4248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:17.936059', 'step': 4248, 'epoch': 1} {'type': 'loss', 'content': 0.24200986325740814, 'timestamp': '2025-09-30 22:17:17.939061', 'step': 4249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:17.970784', 'step': 4249, 'epoch': 1} {'type': 'loss', 'content': 0.29109346866607666, 'timestamp': '2025-09-30 22:17:17.973626', 'step': 4250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.010901', 'step': 4250, 'epoch': 1} {'type': 'loss', 'content': 0.2011195421218872, 'timestamp': '2025-09-30 22:17:18.014089', 'step': 4251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:18.046400', 'step': 4251, 'epoch': 1} {'type': 'loss', 'content': 0.15962174534797668, 'timestamp': '2025-09-30 22:17:18.073654', 'step': 4252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:18.111325', 'step': 4252, 'epoch': 1} {'type': 'loss', 'content': 0.13650117814540863, 'timestamp': '2025-09-30 22:17:18.114159', 'step': 4253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:18.146919', 'step': 4253, 'epoch': 1} {'type': 'loss', 'content': 0.17168448865413666, 'timestamp': '2025-09-30 22:17:18.149915', 'step': 4254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:18.183964', 'step': 4254, 'epoch': 1} {'type': 'loss', 'content': 0.15462128818035126, 'timestamp': '2025-09-30 22:17:18.187398', 'step': 4255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:17:18.218766', 'step': 4255, 'epoch': 1} {'type': 'loss', 'content': 0.10846517980098724, 'timestamp': '2025-09-30 22:17:18.243945', 'step': 4256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:18.276167', 'step': 4256, 'epoch': 1} {'type': 'loss', 'content': 0.20383816957473755, 'timestamp': '2025-09-30 22:17:18.282500', 'step': 4257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.320811', 'step': 4257, 'epoch': 1} {'type': 'loss', 'content': 0.27967318892478943, 'timestamp': '2025-09-30 22:17:18.329345', 'step': 4258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:18.363638', 'step': 4258, 'epoch': 1} {'type': 'loss', 'content': 0.1481916308403015, 'timestamp': '2025-09-30 22:17:18.372409', 'step': 4259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:18.403265', 'step': 4259, 'epoch': 1} {'type': 'loss', 'content': 0.1161765530705452, 'timestamp': '2025-09-30 22:17:18.431620', 'step': 4260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:18.464586', 'step': 4260, 'epoch': 1} {'type': 'loss', 'content': 0.17587910592556, 'timestamp': '2025-09-30 22:17:18.467063', 'step': 4261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.500391', 'step': 4261, 'epoch': 1} {'type': 'loss', 'content': 0.16138674318790436, 'timestamp': '2025-09-30 22:17:18.510126', 'step': 4262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:18.543848', 'step': 4262, 'epoch': 1} {'type': 'loss', 'content': 0.17510806024074554, 'timestamp': '2025-09-30 22:17:18.546637', 'step': 4263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.582799', 'step': 4263, 'epoch': 1} {'type': 'loss', 'content': 0.08963797986507416, 'timestamp': '2025-09-30 22:17:18.606788', 'step': 4264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.645419', 'step': 4264, 'epoch': 1} {'type': 'loss', 'content': 0.12189772725105286, 'timestamp': '2025-09-30 22:17:18.652721', 'step': 4265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:18.684402', 'step': 4265, 'epoch': 1} {'type': 'loss', 'content': 0.19784842431545258, 'timestamp': '2025-09-30 22:17:18.693066', 'step': 4266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:18.729440', 'step': 4266, 'epoch': 1} {'type': 'loss', 'content': 0.13349181413650513, 'timestamp': '2025-09-30 22:17:18.732200', 'step': 4267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:18.762759', 'step': 4267, 'epoch': 1} {'type': 'loss', 'content': 0.195299431681633, 'timestamp': '2025-09-30 22:17:18.795992', 'step': 4268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:18.828853', 'step': 4268, 'epoch': 1} {'type': 'loss', 'content': 0.19603796303272247, 'timestamp': '2025-09-30 22:17:18.831616', 'step': 4269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.869039', 'step': 4269, 'epoch': 1} {'type': 'loss', 'content': 0.08421582728624344, 'timestamp': '2025-09-30 22:17:18.872386', 'step': 4270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:18.903557', 'step': 4270, 'epoch': 1} {'type': 'loss', 'content': 0.14338211715221405, 'timestamp': '2025-09-30 22:17:18.907763', 'step': 4271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:18.944230', 'step': 4271, 'epoch': 1} {'type': 'loss', 'content': 0.1311587393283844, 'timestamp': '2025-09-30 22:17:18.970285', 'step': 4272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.002066', 'step': 4272, 'epoch': 1} {'type': 'loss', 'content': 0.1705833077430725, 'timestamp': '2025-09-30 22:17:19.005137', 'step': 4273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.035635', 'step': 4273, 'epoch': 1} {'type': 'loss', 'content': 0.23377187550067902, 'timestamp': '2025-09-30 22:17:19.041639', 'step': 4274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:19.077603', 'step': 4274, 'epoch': 1} {'type': 'loss', 'content': 0.13855528831481934, 'timestamp': '2025-09-30 22:17:19.080810', 'step': 4275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.111733', 'step': 4275, 'epoch': 1} {'type': 'loss', 'content': 0.13877765834331512, 'timestamp': '2025-09-30 22:17:19.142838', 'step': 4276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.173080', 'step': 4276, 'epoch': 1} {'type': 'loss', 'content': 0.11655806005001068, 'timestamp': '2025-09-30 22:17:19.180777', 'step': 4277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:19.216367', 'step': 4277, 'epoch': 1} {'type': 'loss', 'content': 0.09075581282377243, 'timestamp': '2025-09-30 22:17:19.219273', 'step': 4278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:19.256944', 'step': 4278, 'epoch': 1} {'type': 'loss', 'content': 0.15279778838157654, 'timestamp': '2025-09-30 22:17:19.263686', 'step': 4279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.306164', 'step': 4279, 'epoch': 1} {'type': 'loss', 'content': 0.2228962630033493, 'timestamp': '2025-09-30 22:17:19.330747', 'step': 4280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.363502', 'step': 4280, 'epoch': 1} {'type': 'loss', 'content': 0.13700631260871887, 'timestamp': '2025-09-30 22:17:19.369163', 'step': 4281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.400804', 'step': 4281, 'epoch': 1} {'type': 'loss', 'content': 0.09753751009702682, 'timestamp': '2025-09-30 22:17:19.404056', 'step': 4282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:19.438921', 'step': 4282, 'epoch': 1} {'type': 'loss', 'content': 0.13426269590854645, 'timestamp': '2025-09-30 22:17:19.442421', 'step': 4283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:19.474280', 'step': 4283, 'epoch': 1} {'type': 'loss', 'content': 0.19639797508716583, 'timestamp': '2025-09-30 22:17:19.501288', 'step': 4284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.533266', 'step': 4284, 'epoch': 1} {'type': 'loss', 'content': 0.16320279240608215, 'timestamp': '2025-09-30 22:17:19.537923', 'step': 4285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.573548', 'step': 4285, 'epoch': 1} {'type': 'loss', 'content': 0.18969528377056122, 'timestamp': '2025-09-30 22:17:19.578632', 'step': 4286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:19.611009', 'step': 4286, 'epoch': 1} {'type': 'loss', 'content': 0.17205344140529633, 'timestamp': '2025-09-30 22:17:19.615502', 'step': 4287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.654096', 'step': 4287, 'epoch': 1} {'type': 'loss', 'content': 0.15777158737182617, 'timestamp': '2025-09-30 22:17:19.679229', 'step': 4288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.722362', 'step': 4288, 'epoch': 1} {'type': 'loss', 'content': 0.19112969934940338, 'timestamp': '2025-09-30 22:17:19.726080', 'step': 4289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.769341', 'step': 4289, 'epoch': 1} {'type': 'loss', 'content': 0.22938591241836548, 'timestamp': '2025-09-30 22:17:19.771862', 'step': 4290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:19.806283', 'step': 4290, 'epoch': 1} {'type': 'loss', 'content': 0.13404563069343567, 'timestamp': '2025-09-30 22:17:19.810012', 'step': 4291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:19.844536', 'step': 4291, 'epoch': 1} {'type': 'loss', 'content': 0.164933443069458, 'timestamp': '2025-09-30 22:17:19.872648', 'step': 4292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:19.904711', 'step': 4292, 'epoch': 1} {'type': 'loss', 'content': 0.2766839563846588, 'timestamp': '2025-09-30 22:17:19.910317', 'step': 4293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:19.949540', 'step': 4293, 'epoch': 1} {'type': 'loss', 'content': 0.12478184700012207, 'timestamp': '2025-09-30 22:17:19.952058', 'step': 4294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:19.991776', 'step': 4294, 'epoch': 1} {'type': 'loss', 'content': 0.13053005933761597, 'timestamp': '2025-09-30 22:17:20.003873', 'step': 4295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:20.040576', 'step': 4295, 'epoch': 1} {'type': 'loss', 'content': 0.16373476386070251, 'timestamp': '2025-09-30 22:17:20.069645', 'step': 4296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.107918', 'step': 4296, 'epoch': 1} {'type': 'loss', 'content': 0.1720850020647049, 'timestamp': '2025-09-30 22:17:20.111475', 'step': 4297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:20.144270', 'step': 4297, 'epoch': 1} {'type': 'loss', 'content': 0.11267773061990738, 'timestamp': '2025-09-30 22:17:20.150766', 'step': 4298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.198105', 'step': 4298, 'epoch': 1} {'type': 'loss', 'content': 0.25246182084083557, 'timestamp': '2025-09-30 22:17:20.206171', 'step': 4299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.238100', 'step': 4299, 'epoch': 1} {'type': 'loss', 'content': 0.15391378104686737, 'timestamp': '2025-09-30 22:17:20.269908', 'step': 4300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:17:20.301194', 'step': 4300, 'epoch': 1} {'type': 'loss', 'content': 0.19473567605018616, 'timestamp': '2025-09-30 22:17:20.309627', 'step': 4301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:20.346523', 'step': 4301, 'epoch': 1} {'type': 'loss', 'content': 0.10595668107271194, 'timestamp': '2025-09-30 22:17:20.350694', 'step': 4302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.386100', 'step': 4302, 'epoch': 1} {'type': 'loss', 'content': 0.1722991168498993, 'timestamp': '2025-09-30 22:17:20.393639', 'step': 4303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.433769', 'step': 4303, 'epoch': 1} {'type': 'loss', 'content': 0.1193193718791008, 'timestamp': '2025-09-30 22:17:20.457833', 'step': 4304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.502903', 'step': 4304, 'epoch': 1} {'type': 'loss', 'content': 0.17678998410701752, 'timestamp': '2025-09-30 22:17:20.506389', 'step': 4305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:20.545766', 'step': 4305, 'epoch': 1} {'type': 'loss', 'content': 0.09490089863538742, 'timestamp': '2025-09-30 22:17:20.554555', 'step': 4306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:20.590757', 'step': 4306, 'epoch': 1} {'type': 'loss', 'content': 0.12587353587150574, 'timestamp': '2025-09-30 22:17:20.597695', 'step': 4307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:20.634794', 'step': 4307, 'epoch': 1} {'type': 'loss', 'content': 0.1741771399974823, 'timestamp': '2025-09-30 22:17:20.659488', 'step': 4308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.696730', 'step': 4308, 'epoch': 1} {'type': 'loss', 'content': 0.15285852551460266, 'timestamp': '2025-09-30 22:17:20.703697', 'step': 4309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:20.734503', 'step': 4309, 'epoch': 1} {'type': 'loss', 'content': 0.1837032288312912, 'timestamp': '2025-09-30 22:17:20.740939', 'step': 4310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:20.776853', 'step': 4310, 'epoch': 1} {'type': 'loss', 'content': 0.06545843929052353, 'timestamp': '2025-09-30 22:17:20.785697', 'step': 4311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.823881', 'step': 4311, 'epoch': 1} {'type': 'loss', 'content': 0.1845182180404663, 'timestamp': '2025-09-30 22:17:20.853414', 'step': 4312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.888162', 'step': 4312, 'epoch': 1} {'type': 'loss', 'content': 0.11765541136264801, 'timestamp': '2025-09-30 22:17:20.895715', 'step': 4313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:20.926148', 'step': 4313, 'epoch': 1} {'type': 'loss', 'content': 0.18952925503253937, 'timestamp': '2025-09-30 22:17:20.930853', 'step': 4314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:20.961639', 'step': 4314, 'epoch': 1} {'type': 'loss', 'content': 0.22555503249168396, 'timestamp': '2025-09-30 22:17:20.964443', 'step': 4315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.007764', 'step': 4315, 'epoch': 1} {'type': 'loss', 'content': 0.1190943792462349, 'timestamp': '2025-09-30 22:17:21.033472', 'step': 4316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:21.068709', 'step': 4316, 'epoch': 1} {'type': 'loss', 'content': 0.28385692834854126, 'timestamp': '2025-09-30 22:17:21.072654', 'step': 4317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:21.109231', 'step': 4317, 'epoch': 1} {'type': 'loss', 'content': 0.11022750288248062, 'timestamp': '2025-09-30 22:17:21.112345', 'step': 4318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.152261', 'step': 4318, 'epoch': 1} {'type': 'loss', 'content': 0.14533844590187073, 'timestamp': '2025-09-30 22:17:21.160538', 'step': 4319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.198203', 'step': 4319, 'epoch': 1} {'type': 'loss', 'content': 0.21607936918735504, 'timestamp': '2025-09-30 22:17:21.222836', 'step': 4320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.261602', 'step': 4320, 'epoch': 1} {'type': 'loss', 'content': 0.12683092057704926, 'timestamp': '2025-09-30 22:17:21.265538', 'step': 4321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.297860', 'step': 4321, 'epoch': 1} {'type': 'loss', 'content': 0.1964462697505951, 'timestamp': '2025-09-30 22:17:21.300834', 'step': 4322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.331730', 'step': 4322, 'epoch': 1} {'type': 'loss', 'content': 0.21863269805908203, 'timestamp': '2025-09-30 22:17:21.339922', 'step': 4323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:21.375506', 'step': 4323, 'epoch': 1} {'type': 'loss', 'content': 0.21984444558620453, 'timestamp': '2025-09-30 22:17:21.399416', 'step': 4324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:21.436652', 'step': 4324, 'epoch': 1} {'type': 'loss', 'content': 0.10883674025535583, 'timestamp': '2025-09-30 22:17:21.441187', 'step': 4325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.471966', 'step': 4325, 'epoch': 1} {'type': 'loss', 'content': 0.1373673677444458, 'timestamp': '2025-09-30 22:17:21.474915', 'step': 4326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.513271', 'step': 4326, 'epoch': 1} {'type': 'loss', 'content': 0.3019162118434906, 'timestamp': '2025-09-30 22:17:21.521215', 'step': 4327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.556338', 'step': 4327, 'epoch': 1} {'type': 'loss', 'content': 0.1484253704547882, 'timestamp': '2025-09-30 22:17:21.581546', 'step': 4328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.618806', 'step': 4328, 'epoch': 1} {'type': 'loss', 'content': 0.14283984899520874, 'timestamp': '2025-09-30 22:17:21.627301', 'step': 4329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:21.660716', 'step': 4329, 'epoch': 1} {'type': 'loss', 'content': 0.19450415670871735, 'timestamp': '2025-09-30 22:17:21.664334', 'step': 4330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.695825', 'step': 4330, 'epoch': 1} {'type': 'loss', 'content': 0.18694822490215302, 'timestamp': '2025-09-30 22:17:21.698663', 'step': 4331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.735155', 'step': 4331, 'epoch': 1} {'type': 'loss', 'content': 0.0982944518327713, 'timestamp': '2025-09-30 22:17:21.759500', 'step': 4332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.789533', 'step': 4332, 'epoch': 1} {'type': 'loss', 'content': 0.15313470363616943, 'timestamp': '2025-09-30 22:17:21.792602', 'step': 4333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.830277', 'step': 4333, 'epoch': 1} {'type': 'loss', 'content': 0.0756153091788292, 'timestamp': '2025-09-30 22:17:21.833864', 'step': 4334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.864626', 'step': 4334, 'epoch': 1} {'type': 'loss', 'content': 0.11460131406784058, 'timestamp': '2025-09-30 22:17:21.867341', 'step': 4335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:21.903053', 'step': 4335, 'epoch': 1} {'type': 'loss', 'content': 0.19376879930496216, 'timestamp': '2025-09-30 22:17:21.930296', 'step': 4336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:21.961016', 'step': 4336, 'epoch': 1} {'type': 'loss', 'content': 0.13316169381141663, 'timestamp': '2025-09-30 22:17:21.963882', 'step': 4337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:21.995099', 'step': 4337, 'epoch': 1} {'type': 'loss', 'content': 0.19067318737506866, 'timestamp': '2025-09-30 22:17:22.001272', 'step': 4338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.038344', 'step': 4338, 'epoch': 1} {'type': 'loss', 'content': 0.165121927857399, 'timestamp': '2025-09-30 22:17:22.043269', 'step': 4339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.074562', 'step': 4339, 'epoch': 1} {'type': 'loss', 'content': 0.12287265062332153, 'timestamp': '2025-09-30 22:17:22.099721', 'step': 4340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:22.135905', 'step': 4340, 'epoch': 1} {'type': 'loss', 'content': 0.13753460347652435, 'timestamp': '2025-09-30 22:17:22.139348', 'step': 4341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:22.176584', 'step': 4341, 'epoch': 1} {'type': 'loss', 'content': 0.15038013458251953, 'timestamp': '2025-09-30 22:17:22.181038', 'step': 4342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.212890', 'step': 4342, 'epoch': 1} {'type': 'loss', 'content': 0.17652437090873718, 'timestamp': '2025-09-30 22:17:22.228415', 'step': 4343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.261366', 'step': 4343, 'epoch': 1} {'type': 'loss', 'content': 0.1945248693227768, 'timestamp': '2025-09-30 22:17:22.286039', 'step': 4344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.317506', 'step': 4344, 'epoch': 1} {'type': 'loss', 'content': 0.09470008313655853, 'timestamp': '2025-09-30 22:17:22.327810', 'step': 4345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.358493', 'step': 4345, 'epoch': 1} {'type': 'loss', 'content': 0.14021116495132446, 'timestamp': '2025-09-30 22:17:22.373148', 'step': 4346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:22.404974', 'step': 4346, 'epoch': 1} {'type': 'loss', 'content': 0.21160699427127838, 'timestamp': '2025-09-30 22:17:22.415913', 'step': 4347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.446920', 'step': 4347, 'epoch': 1} {'type': 'loss', 'content': 0.18869540095329285, 'timestamp': '2025-09-30 22:17:22.471675', 'step': 4348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:22.502914', 'step': 4348, 'epoch': 1} {'type': 'loss', 'content': 0.16664227843284607, 'timestamp': '2025-09-30 22:17:22.512099', 'step': 4349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.542830', 'step': 4349, 'epoch': 1} {'type': 'loss', 'content': 0.22039255499839783, 'timestamp': '2025-09-30 22:17:22.551486', 'step': 4350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:22.583161', 'step': 4350, 'epoch': 1} {'type': 'loss', 'content': 0.13548710942268372, 'timestamp': '2025-09-30 22:17:22.586921', 'step': 4351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.618046', 'step': 4351, 'epoch': 1} {'type': 'loss', 'content': 0.18453682959079742, 'timestamp': '2025-09-30 22:17:22.645034', 'step': 4352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.681772', 'step': 4352, 'epoch': 1} {'type': 'loss', 'content': 0.11214108020067215, 'timestamp': '2025-09-30 22:17:22.690346', 'step': 4353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.727298', 'step': 4353, 'epoch': 1} {'type': 'loss', 'content': 0.18260937929153442, 'timestamp': '2025-09-30 22:17:22.734417', 'step': 4354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:22.765527', 'step': 4354, 'epoch': 1} {'type': 'loss', 'content': 0.2372792512178421, 'timestamp': '2025-09-30 22:17:22.768534', 'step': 4355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:22.810531', 'step': 4355, 'epoch': 1} {'type': 'loss', 'content': 0.061982251703739166, 'timestamp': '2025-09-30 22:17:22.835098', 'step': 4356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.873808', 'step': 4356, 'epoch': 1} {'type': 'loss', 'content': 0.24167878925800323, 'timestamp': '2025-09-30 22:17:22.885788', 'step': 4357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.916731', 'step': 4357, 'epoch': 1} {'type': 'loss', 'content': 0.1836264729499817, 'timestamp': '2025-09-30 22:17:22.920955', 'step': 4358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:22.953272', 'step': 4358, 'epoch': 1} {'type': 'loss', 'content': 0.2307152897119522, 'timestamp': '2025-09-30 22:17:22.961735', 'step': 4359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.000315', 'step': 4359, 'epoch': 1} {'type': 'loss', 'content': 0.049767255783081055, 'timestamp': '2025-09-30 22:17:23.032421', 'step': 4360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:23.064348', 'step': 4360, 'epoch': 1} {'type': 'loss', 'content': 0.10608187317848206, 'timestamp': '2025-09-30 22:17:23.068604', 'step': 4361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.099858', 'step': 4361, 'epoch': 1} {'type': 'loss', 'content': 0.23465898633003235, 'timestamp': '2025-09-30 22:17:23.103250', 'step': 4362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:23.135438', 'step': 4362, 'epoch': 1} {'type': 'loss', 'content': 0.08848890662193298, 'timestamp': '2025-09-30 22:17:23.140071', 'step': 4363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:23.171422', 'step': 4363, 'epoch': 1} {'type': 'loss', 'content': 0.17096398770809174, 'timestamp': '2025-09-30 22:17:23.204278', 'step': 4364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:23.236835', 'step': 4364, 'epoch': 1} {'type': 'loss', 'content': 0.1588738113641739, 'timestamp': '2025-09-30 22:17:23.244426', 'step': 4365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.284163', 'step': 4365, 'epoch': 1} {'type': 'loss', 'content': 0.15544643998146057, 'timestamp': '2025-09-30 22:17:23.288322', 'step': 4366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.319845', 'step': 4366, 'epoch': 1} {'type': 'loss', 'content': 0.13929028809070587, 'timestamp': '2025-09-30 22:17:23.324346', 'step': 4367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:23.356636', 'step': 4367, 'epoch': 1} {'type': 'loss', 'content': 0.29551172256469727, 'timestamp': '2025-09-30 22:17:23.381989', 'step': 4368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.416103', 'step': 4368, 'epoch': 1} {'type': 'loss', 'content': 0.13948757946491241, 'timestamp': '2025-09-30 22:17:23.423142', 'step': 4369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:23.460266', 'step': 4369, 'epoch': 1} {'type': 'loss', 'content': 0.28922638297080994, 'timestamp': '2025-09-30 22:17:23.467870', 'step': 4370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.506092', 'step': 4370, 'epoch': 1} {'type': 'loss', 'content': 0.17145778238773346, 'timestamp': '2025-09-30 22:17:23.508668', 'step': 4371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.540737', 'step': 4371, 'epoch': 1} {'type': 'loss', 'content': 0.23099493980407715, 'timestamp': '2025-09-30 22:17:23.565166', 'step': 4372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.597674', 'step': 4372, 'epoch': 1} {'type': 'loss', 'content': 0.15181052684783936, 'timestamp': '2025-09-30 22:17:23.602092', 'step': 4373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.634127', 'step': 4373, 'epoch': 1} {'type': 'loss', 'content': 0.26375335454940796, 'timestamp': '2025-09-30 22:17:23.642241', 'step': 4374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.673536', 'step': 4374, 'epoch': 1} {'type': 'loss', 'content': 0.26085588335990906, 'timestamp': '2025-09-30 22:17:23.677988', 'step': 4375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:23.709986', 'step': 4375, 'epoch': 1} {'type': 'loss', 'content': 0.13021643459796906, 'timestamp': '2025-09-30 22:17:23.752194', 'step': 4376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:23.783030', 'step': 4376, 'epoch': 1} {'type': 'loss', 'content': 0.09676887094974518, 'timestamp': '2025-09-30 22:17:23.795455', 'step': 4377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.836954', 'step': 4377, 'epoch': 1} {'type': 'loss', 'content': 0.12352918833494186, 'timestamp': '2025-09-30 22:17:23.840593', 'step': 4378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:23.874540', 'step': 4378, 'epoch': 1} {'type': 'loss', 'content': 0.19302350282669067, 'timestamp': '2025-09-30 22:17:23.877558', 'step': 4379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:23.909593', 'step': 4379, 'epoch': 1} {'type': 'loss', 'content': 0.12281050533056259, 'timestamp': '2025-09-30 22:17:23.933696', 'step': 4380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:23.963550', 'step': 4380, 'epoch': 1} {'type': 'loss', 'content': 0.24146375060081482, 'timestamp': '2025-09-30 22:17:23.965737', 'step': 4381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:23.995948', 'step': 4381, 'epoch': 1} {'type': 'loss', 'content': 0.23604507744312286, 'timestamp': '2025-09-30 22:17:23.999005', 'step': 4382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.029307', 'step': 4382, 'epoch': 1} {'type': 'loss', 'content': 0.15026958286762238, 'timestamp': '2025-09-30 22:17:24.031728', 'step': 4383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.061877', 'step': 4383, 'epoch': 1} {'type': 'loss', 'content': 0.133831188082695, 'timestamp': '2025-09-30 22:17:24.085482', 'step': 4384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.117916', 'step': 4384, 'epoch': 1} {'type': 'loss', 'content': 0.18237227201461792, 'timestamp': '2025-09-30 22:17:24.119960', 'step': 4385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.150222', 'step': 4385, 'epoch': 1} {'type': 'loss', 'content': 0.16482900083065033, 'timestamp': '2025-09-30 22:17:24.152567', 'step': 4386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.189333', 'step': 4386, 'epoch': 1} {'type': 'loss', 'content': 0.19012826681137085, 'timestamp': '2025-09-30 22:17:24.192227', 'step': 4387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.222815', 'step': 4387, 'epoch': 1} {'type': 'loss', 'content': 0.08322568237781525, 'timestamp': '2025-09-30 22:17:24.247348', 'step': 4388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.277840', 'step': 4388, 'epoch': 1} {'type': 'loss', 'content': 0.1503734290599823, 'timestamp': '2025-09-30 22:17:24.280727', 'step': 4389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.311529', 'step': 4389, 'epoch': 1} {'type': 'loss', 'content': 0.14500804245471954, 'timestamp': '2025-09-30 22:17:24.314458', 'step': 4390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:24.346596', 'step': 4390, 'epoch': 1} {'type': 'loss', 'content': 0.16167880594730377, 'timestamp': '2025-09-30 22:17:24.350249', 'step': 4391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:24.381576', 'step': 4391, 'epoch': 1} {'type': 'loss', 'content': 0.17663772404193878, 'timestamp': '2025-09-30 22:17:24.408352', 'step': 4392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.438421', 'step': 4392, 'epoch': 1} {'type': 'loss', 'content': 0.10200808942317963, 'timestamp': '2025-09-30 22:17:24.440924', 'step': 4393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.479472', 'step': 4393, 'epoch': 1} {'type': 'loss', 'content': 0.15470118820667267, 'timestamp': '2025-09-30 22:17:24.484842', 'step': 4394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.517847', 'step': 4394, 'epoch': 1} {'type': 'loss', 'content': 0.1424923986196518, 'timestamp': '2025-09-30 22:17:24.520227', 'step': 4395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.551106', 'step': 4395, 'epoch': 1} {'type': 'loss', 'content': 0.2054748833179474, 'timestamp': '2025-09-30 22:17:24.578231', 'step': 4396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:24.609323', 'step': 4396, 'epoch': 1} {'type': 'loss', 'content': 0.17444488406181335, 'timestamp': '2025-09-30 22:17:24.614939', 'step': 4397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.650885', 'step': 4397, 'epoch': 1} {'type': 'loss', 'content': 0.15616975724697113, 'timestamp': '2025-09-30 22:17:24.657667', 'step': 4398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.692323', 'step': 4398, 'epoch': 1} {'type': 'loss', 'content': 0.12503917515277863, 'timestamp': '2025-09-30 22:17:24.699123', 'step': 4399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.729511', 'step': 4399, 'epoch': 1} {'type': 'loss', 'content': 0.16980354487895966, 'timestamp': '2025-09-30 22:17:24.753766', 'step': 4400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.783767', 'step': 4400, 'epoch': 1} {'type': 'loss', 'content': 0.1445835530757904, 'timestamp': '2025-09-30 22:17:24.786484', 'step': 4401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:24.820650', 'step': 4401, 'epoch': 1} {'type': 'loss', 'content': 0.2607557773590088, 'timestamp': '2025-09-30 22:17:24.823753', 'step': 4402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:24.854765', 'step': 4402, 'epoch': 1} {'type': 'loss', 'content': 0.2154826521873474, 'timestamp': '2025-09-30 22:17:24.858313', 'step': 4403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:24.893553', 'step': 4403, 'epoch': 1} {'type': 'loss', 'content': 0.1671563684940338, 'timestamp': '2025-09-30 22:17:24.922835', 'step': 4404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:24.959779', 'step': 4404, 'epoch': 1} {'type': 'loss', 'content': 0.08589620888233185, 'timestamp': '2025-09-30 22:17:24.962423', 'step': 4405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:24.992920', 'step': 4405, 'epoch': 1} {'type': 'loss', 'content': 0.16444696485996246, 'timestamp': '2025-09-30 22:17:25.001454', 'step': 4406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.036243', 'step': 4406, 'epoch': 1} {'type': 'loss', 'content': 0.08050155639648438, 'timestamp': '2025-09-30 22:17:25.044143', 'step': 4407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:25.079710', 'step': 4407, 'epoch': 1} {'type': 'loss', 'content': 0.15163856744766235, 'timestamp': '2025-09-30 22:17:25.104087', 'step': 4408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:25.134745', 'step': 4408, 'epoch': 1} {'type': 'loss', 'content': 0.09900490939617157, 'timestamp': '2025-09-30 22:17:25.142900', 'step': 4409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:25.179543', 'step': 4409, 'epoch': 1} {'type': 'loss', 'content': 0.17928524315357208, 'timestamp': '2025-09-30 22:17:25.182822', 'step': 4410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:25.218204', 'step': 4410, 'epoch': 1} {'type': 'loss', 'content': 0.1990097165107727, 'timestamp': '2025-09-30 22:17:25.224882', 'step': 4411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:25.255966', 'step': 4411, 'epoch': 1} {'type': 'loss', 'content': 0.15152619779109955, 'timestamp': '2025-09-30 22:17:25.285597', 'step': 4412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:25.319561', 'step': 4412, 'epoch': 1} {'type': 'loss', 'content': 0.15512417256832123, 'timestamp': '2025-09-30 22:17:25.322352', 'step': 4413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.354064', 'step': 4413, 'epoch': 1} {'type': 'loss', 'content': 0.2416110336780548, 'timestamp': '2025-09-30 22:17:25.357339', 'step': 4414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:25.392413', 'step': 4414, 'epoch': 1} {'type': 'loss', 'content': 0.089664526283741, 'timestamp': '2025-09-30 22:17:25.395502', 'step': 4415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.426356', 'step': 4415, 'epoch': 1} {'type': 'loss', 'content': 0.16699962317943573, 'timestamp': '2025-09-30 22:17:25.456742', 'step': 4416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.490914', 'step': 4416, 'epoch': 1} {'type': 'loss', 'content': 0.11474306136369705, 'timestamp': '2025-09-30 22:17:25.498023', 'step': 4417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.530499', 'step': 4417, 'epoch': 1} {'type': 'loss', 'content': 0.1738218367099762, 'timestamp': '2025-09-30 22:17:25.537260', 'step': 4418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:25.573525', 'step': 4418, 'epoch': 1} {'type': 'loss', 'content': 0.12341340631246567, 'timestamp': '2025-09-30 22:17:25.579434', 'step': 4419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.616160', 'step': 4419, 'epoch': 1} {'type': 'loss', 'content': 0.19080695509910583, 'timestamp': '2025-09-30 22:17:25.640765', 'step': 4420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.675059', 'step': 4420, 'epoch': 1} {'type': 'loss', 'content': 0.15238381922245026, 'timestamp': '2025-09-30 22:17:25.677924', 'step': 4421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:25.709439', 'step': 4421, 'epoch': 1} {'type': 'loss', 'content': 0.19678008556365967, 'timestamp': '2025-09-30 22:17:25.716099', 'step': 4422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.751786', 'step': 4422, 'epoch': 1} {'type': 'loss', 'content': 0.253206342458725, 'timestamp': '2025-09-30 22:17:25.754668', 'step': 4423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:25.791165', 'step': 4423, 'epoch': 1} {'type': 'loss', 'content': 0.14441151916980743, 'timestamp': '2025-09-30 22:17:25.815147', 'step': 4424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:25.844656', 'step': 4424, 'epoch': 1} {'type': 'loss', 'content': 0.1816052943468094, 'timestamp': '2025-09-30 22:17:25.847854', 'step': 4425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.881051', 'step': 4425, 'epoch': 1} {'type': 'loss', 'content': 0.19210653007030487, 'timestamp': '2025-09-30 22:17:25.888199', 'step': 4426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:25.919319', 'step': 4426, 'epoch': 1} {'type': 'loss', 'content': 0.17868544161319733, 'timestamp': '2025-09-30 22:17:25.928189', 'step': 4427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:25.959324', 'step': 4427, 'epoch': 1} {'type': 'loss', 'content': 0.08679285645484924, 'timestamp': '2025-09-30 22:17:25.983346', 'step': 4428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.014098', 'step': 4428, 'epoch': 1} {'type': 'loss', 'content': 0.19440887868404388, 'timestamp': '2025-09-30 22:17:26.020325', 'step': 4429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.054802', 'step': 4429, 'epoch': 1} {'type': 'loss', 'content': 0.15804848074913025, 'timestamp': '2025-09-30 22:17:26.063055', 'step': 4430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:26.093448', 'step': 4430, 'epoch': 1} {'type': 'loss', 'content': 0.16245217621326447, 'timestamp': '2025-09-30 22:17:26.101354', 'step': 4431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.133099', 'step': 4431, 'epoch': 1} {'type': 'loss', 'content': 0.24669840931892395, 'timestamp': '2025-09-30 22:17:26.158283', 'step': 4432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.189336', 'step': 4432, 'epoch': 1} {'type': 'loss', 'content': 0.17998813092708588, 'timestamp': '2025-09-30 22:17:26.192910', 'step': 4433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.223989', 'step': 4433, 'epoch': 1} {'type': 'loss', 'content': 0.1544371098279953, 'timestamp': '2025-09-30 22:17:26.227432', 'step': 4434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.258307', 'step': 4434, 'epoch': 1} {'type': 'loss', 'content': 0.2766171097755432, 'timestamp': '2025-09-30 22:17:26.261024', 'step': 4435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.292098', 'step': 4435, 'epoch': 1} {'type': 'loss', 'content': 0.06877094507217407, 'timestamp': '2025-09-30 22:17:26.322467', 'step': 4436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.357679', 'step': 4436, 'epoch': 1} {'type': 'loss', 'content': 0.221888467669487, 'timestamp': '2025-09-30 22:17:26.364714', 'step': 4437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.395015', 'step': 4437, 'epoch': 1} {'type': 'loss', 'content': 0.11389745771884918, 'timestamp': '2025-09-30 22:17:26.401812', 'step': 4438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:26.437523', 'step': 4438, 'epoch': 1} {'type': 'loss', 'content': 0.1551576405763626, 'timestamp': '2025-09-30 22:17:26.440965', 'step': 4439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.472326', 'step': 4439, 'epoch': 1} {'type': 'loss', 'content': 0.144651398062706, 'timestamp': '2025-09-30 22:17:26.496332', 'step': 4440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:26.526817', 'step': 4440, 'epoch': 1} {'type': 'loss', 'content': 0.3670154809951782, 'timestamp': '2025-09-30 22:17:26.529249', 'step': 4441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.559297', 'step': 4441, 'epoch': 1} {'type': 'loss', 'content': 0.14154884219169617, 'timestamp': '2025-09-30 22:17:26.562363', 'step': 4442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.592502', 'step': 4442, 'epoch': 1} {'type': 'loss', 'content': 0.19360387325286865, 'timestamp': '2025-09-30 22:17:26.594606', 'step': 4443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.624705', 'step': 4443, 'epoch': 1} {'type': 'loss', 'content': 0.2385268360376358, 'timestamp': '2025-09-30 22:17:26.648485', 'step': 4444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:26.678737', 'step': 4444, 'epoch': 1} {'type': 'loss', 'content': 0.1587895154953003, 'timestamp': '2025-09-30 22:17:26.680964', 'step': 4445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.712394', 'step': 4445, 'epoch': 1} {'type': 'loss', 'content': 0.12140552699565887, 'timestamp': '2025-09-30 22:17:26.715082', 'step': 4446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:26.746325', 'step': 4446, 'epoch': 1} {'type': 'loss', 'content': 0.09298750758171082, 'timestamp': '2025-09-30 22:17:26.748609', 'step': 4447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.778984', 'step': 4447, 'epoch': 1} {'type': 'loss', 'content': 0.21457095444202423, 'timestamp': '2025-09-30 22:17:26.802735', 'step': 4448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:26.833466', 'step': 4448, 'epoch': 1} {'type': 'loss', 'content': 0.09329892694950104, 'timestamp': '2025-09-30 22:17:26.836106', 'step': 4449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:26.876610', 'step': 4449, 'epoch': 1} {'type': 'loss', 'content': 0.2717982828617096, 'timestamp': '2025-09-30 22:17:26.878894', 'step': 4450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:26.909547', 'step': 4450, 'epoch': 1} {'type': 'loss', 'content': 0.20180970430374146, 'timestamp': '2025-09-30 22:17:26.916057', 'step': 4451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:26.949161', 'step': 4451, 'epoch': 1} {'type': 'loss', 'content': 0.15519262850284576, 'timestamp': '2025-09-30 22:17:26.973215', 'step': 4452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.003428', 'step': 4452, 'epoch': 1} {'type': 'loss', 'content': 0.14717689156532288, 'timestamp': '2025-09-30 22:17:27.005759', 'step': 4453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.036287', 'step': 4453, 'epoch': 1} {'type': 'loss', 'content': 0.1497848778963089, 'timestamp': '2025-09-30 22:17:27.038787', 'step': 4454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:27.070011', 'step': 4454, 'epoch': 1} {'type': 'loss', 'content': 0.2454475611448288, 'timestamp': '2025-09-30 22:17:27.072257', 'step': 4455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.102504', 'step': 4455, 'epoch': 1} {'type': 'loss', 'content': 0.12072605639696121, 'timestamp': '2025-09-30 22:17:27.126206', 'step': 4456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.158829', 'step': 4456, 'epoch': 1} {'type': 'loss', 'content': 0.08071970194578171, 'timestamp': '2025-09-30 22:17:27.161460', 'step': 4457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.192301', 'step': 4457, 'epoch': 1} {'type': 'loss', 'content': 0.14832454919815063, 'timestamp': '2025-09-30 22:17:27.194730', 'step': 4458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.225914', 'step': 4458, 'epoch': 1} {'type': 'loss', 'content': 0.08925069123506546, 'timestamp': '2025-09-30 22:17:27.228113', 'step': 4459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.260597', 'step': 4459, 'epoch': 1} {'type': 'loss', 'content': 0.20054610073566437, 'timestamp': '2025-09-30 22:17:27.284540', 'step': 4460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.315529', 'step': 4460, 'epoch': 1} {'type': 'loss', 'content': 0.16459853947162628, 'timestamp': '2025-09-30 22:17:27.317829', 'step': 4461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.348744', 'step': 4461, 'epoch': 1} {'type': 'loss', 'content': 0.06805930286645889, 'timestamp': '2025-09-30 22:17:27.351073', 'step': 4462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.381786', 'step': 4462, 'epoch': 1} {'type': 'loss', 'content': 0.19303357601165771, 'timestamp': '2025-09-30 22:17:27.384179', 'step': 4463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.414839', 'step': 4463, 'epoch': 1} {'type': 'loss', 'content': 0.22864322364330292, 'timestamp': '2025-09-30 22:17:27.443942', 'step': 4464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.474388', 'step': 4464, 'epoch': 1} {'type': 'loss', 'content': 0.12403232604265213, 'timestamp': '2025-09-30 22:17:27.476808', 'step': 4465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.507872', 'step': 4465, 'epoch': 1} {'type': 'loss', 'content': 0.19959449768066406, 'timestamp': '2025-09-30 22:17:27.510228', 'step': 4466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:27.540236', 'step': 4466, 'epoch': 1} {'type': 'loss', 'content': 0.16726012527942657, 'timestamp': '2025-09-30 22:17:27.542423', 'step': 4467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:17:27.574073', 'step': 4467, 'epoch': 1} {'type': 'loss', 'content': 0.13380829989910126, 'timestamp': '2025-09-30 22:17:27.602239', 'step': 4468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.632327', 'step': 4468, 'epoch': 1} {'type': 'loss', 'content': 0.10287006944417953, 'timestamp': '2025-09-30 22:17:27.635269', 'step': 4469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:27.667941', 'step': 4469, 'epoch': 1} {'type': 'loss', 'content': 0.1427946835756302, 'timestamp': '2025-09-30 22:17:27.670516', 'step': 4470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.702635', 'step': 4470, 'epoch': 1} {'type': 'loss', 'content': 0.2389269769191742, 'timestamp': '2025-09-30 22:17:27.705542', 'step': 4471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.736480', 'step': 4471, 'epoch': 1} {'type': 'loss', 'content': 0.12219858914613724, 'timestamp': '2025-09-30 22:17:27.761170', 'step': 4472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.792135', 'step': 4472, 'epoch': 1} {'type': 'loss', 'content': 0.21528680622577667, 'timestamp': '2025-09-30 22:17:27.794420', 'step': 4473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:27.824355', 'step': 4473, 'epoch': 1} {'type': 'loss', 'content': 0.07524442672729492, 'timestamp': '2025-09-30 22:17:27.826609', 'step': 4474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:27.857508', 'step': 4474, 'epoch': 1} {'type': 'loss', 'content': 0.2168540358543396, 'timestamp': '2025-09-30 22:17:27.860534', 'step': 4475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:27.892608', 'step': 4475, 'epoch': 1} {'type': 'loss', 'content': 0.16420115530490875, 'timestamp': '2025-09-30 22:17:27.916392', 'step': 4476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.946321', 'step': 4476, 'epoch': 1} {'type': 'loss', 'content': 0.11988488584756851, 'timestamp': '2025-09-30 22:17:27.948774', 'step': 4477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:27.979662', 'step': 4477, 'epoch': 1} {'type': 'loss', 'content': 0.10411428660154343, 'timestamp': '2025-09-30 22:17:27.982267', 'step': 4478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.012975', 'step': 4478, 'epoch': 1} {'type': 'loss', 'content': 0.08936646580696106, 'timestamp': '2025-09-30 22:17:28.015345', 'step': 4479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:28.046255', 'step': 4479, 'epoch': 1} {'type': 'loss', 'content': 0.17605344951152802, 'timestamp': '2025-09-30 22:17:28.069784', 'step': 4480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.105847', 'step': 4480, 'epoch': 1} {'type': 'loss', 'content': 0.1879156231880188, 'timestamp': '2025-09-30 22:17:28.114882', 'step': 4481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.145319', 'step': 4481, 'epoch': 1} {'type': 'loss', 'content': 0.15962909162044525, 'timestamp': '2025-09-30 22:17:28.147785', 'step': 4482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.178493', 'step': 4482, 'epoch': 1} {'type': 'loss', 'content': 0.1130322813987732, 'timestamp': '2025-09-30 22:17:28.181162', 'step': 4483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.211547', 'step': 4483, 'epoch': 1} {'type': 'loss', 'content': 0.14054575562477112, 'timestamp': '2025-09-30 22:17:28.235074', 'step': 4484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.264881', 'step': 4484, 'epoch': 1} {'type': 'loss', 'content': 0.08603125810623169, 'timestamp': '2025-09-30 22:17:28.267250', 'step': 4485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.296914', 'step': 4485, 'epoch': 1} {'type': 'loss', 'content': 0.21873565018177032, 'timestamp': '2025-09-30 22:17:28.298997', 'step': 4486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:28.329017', 'step': 4486, 'epoch': 1} {'type': 'loss', 'content': 0.1718224436044693, 'timestamp': '2025-09-30 22:17:28.331421', 'step': 4487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:28.362257', 'step': 4487, 'epoch': 1} {'type': 'loss', 'content': 0.10274387896060944, 'timestamp': '2025-09-30 22:17:28.385928', 'step': 4488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.416100', 'step': 4488, 'epoch': 1} {'type': 'loss', 'content': 0.14835762977600098, 'timestamp': '2025-09-30 22:17:28.419013', 'step': 4489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:28.449984', 'step': 4489, 'epoch': 1} {'type': 'loss', 'content': 0.09928198158740997, 'timestamp': '2025-09-30 22:17:28.452790', 'step': 4490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.483549', 'step': 4490, 'epoch': 1} {'type': 'loss', 'content': 0.18147219717502594, 'timestamp': '2025-09-30 22:17:28.485743', 'step': 4491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.515956', 'step': 4491, 'epoch': 1} {'type': 'loss', 'content': 0.13981519639492035, 'timestamp': '2025-09-30 22:17:28.539867', 'step': 4492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.569753', 'step': 4492, 'epoch': 1} {'type': 'loss', 'content': 0.14593957364559174, 'timestamp': '2025-09-30 22:17:28.572287', 'step': 4493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:28.602508', 'step': 4493, 'epoch': 1} {'type': 'loss', 'content': 0.10998982191085815, 'timestamp': '2025-09-30 22:17:28.604647', 'step': 4494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.635170', 'step': 4494, 'epoch': 1} {'type': 'loss', 'content': 0.11767151206731796, 'timestamp': '2025-09-30 22:17:28.637825', 'step': 4495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.668036', 'step': 4495, 'epoch': 1} {'type': 'loss', 'content': 0.15777164697647095, 'timestamp': '2025-09-30 22:17:28.691867', 'step': 4496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:28.721547', 'step': 4496, 'epoch': 1} {'type': 'loss', 'content': 0.27871623635292053, 'timestamp': '2025-09-30 22:17:28.723998', 'step': 4497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:28.754990', 'step': 4497, 'epoch': 1} {'type': 'loss', 'content': 0.14415015280246735, 'timestamp': '2025-09-30 22:17:28.757353', 'step': 4498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:28.789495', 'step': 4498, 'epoch': 1} {'type': 'loss', 'content': 0.20440645515918732, 'timestamp': '2025-09-30 22:17:28.792052', 'step': 4499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:28.821892', 'step': 4499, 'epoch': 1} {'type': 'loss', 'content': 0.10015613585710526, 'timestamp': '2025-09-30 22:17:28.845682', 'step': 4500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 4500', 'timestamp': '2025-09-30 22:17:33.681882', 'step': 4500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:33.714018', 'step': 4500, 'epoch': 1} {'type': 'loss', 'content': 0.1585000902414322, 'timestamp': '2025-09-30 22:17:33.724545', 'step': 4501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:33.761156', 'step': 4501, 'epoch': 1} {'type': 'loss', 'content': 0.09999693930149078, 'timestamp': '2025-09-30 22:17:33.773333', 'step': 4502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:33.811013', 'step': 4502, 'epoch': 1} {'type': 'loss', 'content': 0.2630082666873932, 'timestamp': '2025-09-30 22:17:33.822539', 'step': 4503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:33.866790', 'step': 4503, 'epoch': 1} {'type': 'loss', 'content': 0.16089457273483276, 'timestamp': '2025-09-30 22:17:33.899832', 'step': 4504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:33.936879', 'step': 4504, 'epoch': 1} {'type': 'loss', 'content': 0.27057087421417236, 'timestamp': '2025-09-30 22:17:33.947004', 'step': 4505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:33.985617', 'step': 4505, 'epoch': 1} {'type': 'loss', 'content': 0.09306669235229492, 'timestamp': '2025-09-30 22:17:33.987980', 'step': 4506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.022995', 'step': 4506, 'epoch': 1} {'type': 'loss', 'content': 0.2354700118303299, 'timestamp': '2025-09-30 22:17:34.025671', 'step': 4507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.062217', 'step': 4507, 'epoch': 1} {'type': 'loss', 'content': 0.20061145722866058, 'timestamp': '2025-09-30 22:17:34.093076', 'step': 4508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:34.126906', 'step': 4508, 'epoch': 1} {'type': 'loss', 'content': 0.18267060816287994, 'timestamp': '2025-09-30 22:17:34.130438', 'step': 4509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.166803', 'step': 4509, 'epoch': 1} {'type': 'loss', 'content': 0.17110872268676758, 'timestamp': '2025-09-30 22:17:34.172406', 'step': 4510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:34.206167', 'step': 4510, 'epoch': 1} {'type': 'loss', 'content': 0.16728997230529785, 'timestamp': '2025-09-30 22:17:34.214531', 'step': 4511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:34.245190', 'step': 4511, 'epoch': 1} {'type': 'loss', 'content': 0.19507311284542084, 'timestamp': '2025-09-30 22:17:34.277194', 'step': 4512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.307431', 'step': 4512, 'epoch': 1} {'type': 'loss', 'content': 0.18445837497711182, 'timestamp': '2025-09-30 22:17:34.310031', 'step': 4513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.346690', 'step': 4513, 'epoch': 1} {'type': 'loss', 'content': 0.21751441061496735, 'timestamp': '2025-09-30 22:17:34.349035', 'step': 4514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.379915', 'step': 4514, 'epoch': 1} {'type': 'loss', 'content': 0.16184470057487488, 'timestamp': '2025-09-30 22:17:34.385350', 'step': 4515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.416481', 'step': 4515, 'epoch': 1} {'type': 'loss', 'content': 0.13142293691635132, 'timestamp': '2025-09-30 22:17:34.440482', 'step': 4516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.471001', 'step': 4516, 'epoch': 1} {'type': 'loss', 'content': 0.11174378544092178, 'timestamp': '2025-09-30 22:17:34.473497', 'step': 4517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.504466', 'step': 4517, 'epoch': 1} {'type': 'loss', 'content': 0.174832284450531, 'timestamp': '2025-09-30 22:17:34.516665', 'step': 4518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.549865', 'step': 4518, 'epoch': 1} {'type': 'loss', 'content': 0.1369897872209549, 'timestamp': '2025-09-30 22:17:34.553639', 'step': 4519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:34.585093', 'step': 4519, 'epoch': 1} {'type': 'loss', 'content': 0.1572013795375824, 'timestamp': '2025-09-30 22:17:34.610479', 'step': 4520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:34.640952', 'step': 4520, 'epoch': 1} {'type': 'loss', 'content': 0.19817309081554413, 'timestamp': '2025-09-30 22:17:34.645333', 'step': 4521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.675675', 'step': 4521, 'epoch': 1} {'type': 'loss', 'content': 0.17982304096221924, 'timestamp': '2025-09-30 22:17:34.679072', 'step': 4522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.709028', 'step': 4522, 'epoch': 1} {'type': 'loss', 'content': 0.10760834068059921, 'timestamp': '2025-09-30 22:17:34.713427', 'step': 4523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:34.743089', 'step': 4523, 'epoch': 1} {'type': 'loss', 'content': 0.14712369441986084, 'timestamp': '2025-09-30 22:17:34.770919', 'step': 4524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:34.803172', 'step': 4524, 'epoch': 1} {'type': 'loss', 'content': 0.11899805068969727, 'timestamp': '2025-09-30 22:17:34.809331', 'step': 4525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:34.844599', 'step': 4525, 'epoch': 1} {'type': 'loss', 'content': 0.1825232058763504, 'timestamp': '2025-09-30 22:17:34.848762', 'step': 4526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:34.883536', 'step': 4526, 'epoch': 1} {'type': 'loss', 'content': 0.15795153379440308, 'timestamp': '2025-09-30 22:17:34.888398', 'step': 4527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:34.923667', 'step': 4527, 'epoch': 1} {'type': 'loss', 'content': 0.10884775221347809, 'timestamp': '2025-09-30 22:17:34.948493', 'step': 4528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:34.981444', 'step': 4528, 'epoch': 1} {'type': 'loss', 'content': 0.19069503247737885, 'timestamp': '2025-09-30 22:17:34.983747', 'step': 4529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:35.016401', 'step': 4529, 'epoch': 1} {'type': 'loss', 'content': 0.1402994990348816, 'timestamp': '2025-09-30 22:17:35.018999', 'step': 4530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:35.051153', 'step': 4530, 'epoch': 1} {'type': 'loss', 'content': 0.15346506237983704, 'timestamp': '2025-09-30 22:17:35.055630', 'step': 4531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:35.088000', 'step': 4531, 'epoch': 1} {'type': 'loss', 'content': 0.18109893798828125, 'timestamp': '2025-09-30 22:17:35.113154', 'step': 4532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:35.145373', 'step': 4532, 'epoch': 1} {'type': 'loss', 'content': 0.11941909790039062, 'timestamp': '2025-09-30 22:17:35.149517', 'step': 4533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:35.182522', 'step': 4533, 'epoch': 1} {'type': 'loss', 'content': 0.3294749855995178, 'timestamp': '2025-09-30 22:17:35.187431', 'step': 4534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.220102', 'step': 4534, 'epoch': 1} {'type': 'loss', 'content': 0.1522236317396164, 'timestamp': '2025-09-30 22:17:35.224660', 'step': 4535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:35.264342', 'step': 4535, 'epoch': 1} {'type': 'loss', 'content': 0.16532380878925323, 'timestamp': '2025-09-30 22:17:35.293698', 'step': 4536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.326514', 'step': 4536, 'epoch': 1} {'type': 'loss', 'content': 0.18622957170009613, 'timestamp': '2025-09-30 22:17:35.329003', 'step': 4537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:35.361707', 'step': 4537, 'epoch': 1} {'type': 'loss', 'content': 0.18293370306491852, 'timestamp': '2025-09-30 22:17:35.366278', 'step': 4538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.396856', 'step': 4538, 'epoch': 1} {'type': 'loss', 'content': 0.13598844408988953, 'timestamp': '2025-09-30 22:17:35.401376', 'step': 4539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:35.433831', 'step': 4539, 'epoch': 1} {'type': 'loss', 'content': 0.1736210435628891, 'timestamp': '2025-09-30 22:17:35.457832', 'step': 4540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:35.488640', 'step': 4540, 'epoch': 1} {'type': 'loss', 'content': 0.18988631665706635, 'timestamp': '2025-09-30 22:17:35.491336', 'step': 4541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:17:35.524898', 'step': 4541, 'epoch': 1} {'type': 'loss', 'content': 0.11721827834844589, 'timestamp': '2025-09-30 22:17:35.530112', 'step': 4542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.560723', 'step': 4542, 'epoch': 1} {'type': 'loss', 'content': 0.24063292145729065, 'timestamp': '2025-09-30 22:17:35.564857', 'step': 4543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.596006', 'step': 4543, 'epoch': 1} {'type': 'loss', 'content': 0.08296824991703033, 'timestamp': '2025-09-30 22:17:35.621588', 'step': 4544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.654750', 'step': 4544, 'epoch': 1} {'type': 'loss', 'content': 0.26909366250038147, 'timestamp': '2025-09-30 22:17:35.662118', 'step': 4545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.696230', 'step': 4545, 'epoch': 1} {'type': 'loss', 'content': 0.09713710099458694, 'timestamp': '2025-09-30 22:17:35.698895', 'step': 4546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.730590', 'step': 4546, 'epoch': 1} {'type': 'loss', 'content': 0.13580425083637238, 'timestamp': '2025-09-30 22:17:35.736538', 'step': 4547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:35.770452', 'step': 4547, 'epoch': 1} {'type': 'loss', 'content': 0.1724022626876831, 'timestamp': '2025-09-30 22:17:35.798505', 'step': 4548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:35.833621', 'step': 4548, 'epoch': 1} {'type': 'loss', 'content': 0.14068558812141418, 'timestamp': '2025-09-30 22:17:35.839757', 'step': 4549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:35.874238', 'step': 4549, 'epoch': 1} {'type': 'loss', 'content': 0.2211872935295105, 'timestamp': '2025-09-30 22:17:35.878285', 'step': 4550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:35.912510', 'step': 4550, 'epoch': 1} {'type': 'loss', 'content': 0.0495273731648922, 'timestamp': '2025-09-30 22:17:35.915261', 'step': 4551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:35.946761', 'step': 4551, 'epoch': 1} {'type': 'loss', 'content': 0.13595342636108398, 'timestamp': '2025-09-30 22:17:35.975442', 'step': 4552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.008523', 'step': 4552, 'epoch': 1} {'type': 'loss', 'content': 0.15017037093639374, 'timestamp': '2025-09-30 22:17:36.012601', 'step': 4553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.044750', 'step': 4553, 'epoch': 1} {'type': 'loss', 'content': 0.15854428708553314, 'timestamp': '2025-09-30 22:17:36.051646', 'step': 4554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.085799', 'step': 4554, 'epoch': 1} {'type': 'loss', 'content': 0.16614492237567902, 'timestamp': '2025-09-30 22:17:36.090226', 'step': 4555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:36.123631', 'step': 4555, 'epoch': 1} {'type': 'loss', 'content': 0.07389471679925919, 'timestamp': '2025-09-30 22:17:36.152402', 'step': 4556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.186327', 'step': 4556, 'epoch': 1} {'type': 'loss', 'content': 0.13441739976406097, 'timestamp': '2025-09-30 22:17:36.188689', 'step': 4557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:36.221042', 'step': 4557, 'epoch': 1} {'type': 'loss', 'content': 0.11033140867948532, 'timestamp': '2025-09-30 22:17:36.226844', 'step': 4558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:36.261411', 'step': 4558, 'epoch': 1} {'type': 'loss', 'content': 0.11744341254234314, 'timestamp': '2025-09-30 22:17:36.264135', 'step': 4559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.294646', 'step': 4559, 'epoch': 1} {'type': 'loss', 'content': 0.14150375127792358, 'timestamp': '2025-09-30 22:17:36.322731', 'step': 4560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.353326', 'step': 4560, 'epoch': 1} {'type': 'loss', 'content': 0.1180030032992363, 'timestamp': '2025-09-30 22:17:36.356116', 'step': 4561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.389930', 'step': 4561, 'epoch': 1} {'type': 'loss', 'content': 0.13896168768405914, 'timestamp': '2025-09-30 22:17:36.393553', 'step': 4562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.424611', 'step': 4562, 'epoch': 1} {'type': 'loss', 'content': 0.11069682985544205, 'timestamp': '2025-09-30 22:17:36.427713', 'step': 4563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.457746', 'step': 4563, 'epoch': 1} {'type': 'loss', 'content': 0.16330744326114655, 'timestamp': '2025-09-30 22:17:36.485468', 'step': 4564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.515320', 'step': 4564, 'epoch': 1} {'type': 'loss', 'content': 0.12096607685089111, 'timestamp': '2025-09-30 22:17:36.517892', 'step': 4565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.547697', 'step': 4565, 'epoch': 1} {'type': 'loss', 'content': 0.0922885313630104, 'timestamp': '2025-09-30 22:17:36.550282', 'step': 4566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.580921', 'step': 4566, 'epoch': 1} {'type': 'loss', 'content': 0.18182145059108734, 'timestamp': '2025-09-30 22:17:36.583578', 'step': 4567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.614681', 'step': 4567, 'epoch': 1} {'type': 'loss', 'content': 0.125333771109581, 'timestamp': '2025-09-30 22:17:36.642008', 'step': 4568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.671732', 'step': 4568, 'epoch': 1} {'type': 'loss', 'content': 0.18945437669754028, 'timestamp': '2025-09-30 22:17:36.679996', 'step': 4569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.713345', 'step': 4569, 'epoch': 1} {'type': 'loss', 'content': 0.24517743289470673, 'timestamp': '2025-09-30 22:17:36.716059', 'step': 4570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.746274', 'step': 4570, 'epoch': 1} {'type': 'loss', 'content': 0.1433468908071518, 'timestamp': '2025-09-30 22:17:36.749505', 'step': 4571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:36.780504', 'step': 4571, 'epoch': 1} {'type': 'loss', 'content': 0.171242356300354, 'timestamp': '2025-09-30 22:17:36.805226', 'step': 4572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:36.835487', 'step': 4572, 'epoch': 1} {'type': 'loss', 'content': 0.19249099493026733, 'timestamp': '2025-09-30 22:17:36.838474', 'step': 4573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:36.869215', 'step': 4573, 'epoch': 1} {'type': 'loss', 'content': 0.09348714351654053, 'timestamp': '2025-09-30 22:17:36.877434', 'step': 4574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.913013', 'step': 4574, 'epoch': 1} {'type': 'loss', 'content': 0.15587389469146729, 'timestamp': '2025-09-30 22:17:36.920282', 'step': 4575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:36.956934', 'step': 4575, 'epoch': 1} {'type': 'loss', 'content': 0.16906537115573883, 'timestamp': '2025-09-30 22:17:36.983141', 'step': 4576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:37.019515', 'step': 4576, 'epoch': 1} {'type': 'loss', 'content': 0.18411785364151, 'timestamp': '2025-09-30 22:17:37.023245', 'step': 4577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:37.054453', 'step': 4577, 'epoch': 1} {'type': 'loss', 'content': 0.1624147593975067, 'timestamp': '2025-09-30 22:17:37.057014', 'step': 4578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:37.101449', 'step': 4578, 'epoch': 1} {'type': 'loss', 'content': 0.0939546525478363, 'timestamp': '2025-09-30 22:17:37.104346', 'step': 4579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:37.135485', 'step': 4579, 'epoch': 1} {'type': 'loss', 'content': 0.13577039539813995, 'timestamp': '2025-09-30 22:17:37.160129', 'step': 4580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:37.190571', 'step': 4580, 'epoch': 1} {'type': 'loss', 'content': 0.1525309532880783, 'timestamp': '2025-09-30 22:17:37.196683', 'step': 4581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:37.228691', 'step': 4581, 'epoch': 1} {'type': 'loss', 'content': 0.1580866426229477, 'timestamp': '2025-09-30 22:17:37.231056', 'step': 4582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:37.265025', 'step': 4582, 'epoch': 1} {'type': 'loss', 'content': 0.23718537390232086, 'timestamp': '2025-09-30 22:17:37.267589', 'step': 4583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.299546', 'step': 4583, 'epoch': 1} {'type': 'loss', 'content': 0.09739534556865692, 'timestamp': '2025-09-30 22:17:37.324133', 'step': 4584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:37.354878', 'step': 4584, 'epoch': 1} {'type': 'loss', 'content': 0.1860230416059494, 'timestamp': '2025-09-30 22:17:37.357239', 'step': 4585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:37.387367', 'step': 4585, 'epoch': 1} {'type': 'loss', 'content': 0.11723649501800537, 'timestamp': '2025-09-30 22:17:37.389860', 'step': 4586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:37.425915', 'step': 4586, 'epoch': 1} {'type': 'loss', 'content': 0.11272063851356506, 'timestamp': '2025-09-30 22:17:37.428083', 'step': 4587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:37.458908', 'step': 4587, 'epoch': 1} {'type': 'loss', 'content': 0.17920775711536407, 'timestamp': '2025-09-30 22:17:37.488011', 'step': 4588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:37.518025', 'step': 4588, 'epoch': 1} {'type': 'loss', 'content': 0.12917423248291016, 'timestamp': '2025-09-30 22:17:37.520445', 'step': 4589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.551900', 'step': 4589, 'epoch': 1} {'type': 'loss', 'content': 0.10457807034254074, 'timestamp': '2025-09-30 22:17:37.555338', 'step': 4590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:37.586098', 'step': 4590, 'epoch': 1} {'type': 'loss', 'content': 0.1306440830230713, 'timestamp': '2025-09-30 22:17:37.588938', 'step': 4591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:17:37.623998', 'step': 4591, 'epoch': 1} {'type': 'loss', 'content': 0.1714533269405365, 'timestamp': '2025-09-30 22:17:37.651351', 'step': 4592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:37.682643', 'step': 4592, 'epoch': 1} {'type': 'loss', 'content': 0.13592493534088135, 'timestamp': '2025-09-30 22:17:37.685248', 'step': 4593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.720396', 'step': 4593, 'epoch': 1} {'type': 'loss', 'content': 0.22501808404922485, 'timestamp': '2025-09-30 22:17:37.723151', 'step': 4594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:37.753427', 'step': 4594, 'epoch': 1} {'type': 'loss', 'content': 0.185696542263031, 'timestamp': '2025-09-30 22:17:37.756116', 'step': 4595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.787127', 'step': 4595, 'epoch': 1} {'type': 'loss', 'content': 0.11756899952888489, 'timestamp': '2025-09-30 22:17:37.815845', 'step': 4596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.845985', 'step': 4596, 'epoch': 1} {'type': 'loss', 'content': 0.17196187376976013, 'timestamp': '2025-09-30 22:17:37.849261', 'step': 4597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.879390', 'step': 4597, 'epoch': 1} {'type': 'loss', 'content': 0.09094501286745071, 'timestamp': '2025-09-30 22:17:37.884039', 'step': 4598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:37.922088', 'step': 4598, 'epoch': 1} {'type': 'loss', 'content': 0.14922954142093658, 'timestamp': '2025-09-30 22:17:37.924542', 'step': 4599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:37.955712', 'step': 4599, 'epoch': 1} {'type': 'loss', 'content': 0.18436172604560852, 'timestamp': '2025-09-30 22:17:37.987125', 'step': 4600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.025087', 'step': 4600, 'epoch': 1} {'type': 'loss', 'content': 0.22148877382278442, 'timestamp': '2025-09-30 22:17:38.028876', 'step': 4601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:38.061741', 'step': 4601, 'epoch': 1} {'type': 'loss', 'content': 0.10642290860414505, 'timestamp': '2025-09-30 22:17:38.065952', 'step': 4602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:38.102468', 'step': 4602, 'epoch': 1} {'type': 'loss', 'content': 0.19104702770709991, 'timestamp': '2025-09-30 22:17:38.108589', 'step': 4603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:38.161702', 'step': 4603, 'epoch': 1} {'type': 'loss', 'content': 0.13430243730545044, 'timestamp': '2025-09-30 22:17:38.188751', 'step': 4604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.222502', 'step': 4604, 'epoch': 1} {'type': 'loss', 'content': 0.14649781584739685, 'timestamp': '2025-09-30 22:17:38.225976', 'step': 4605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.256835', 'step': 4605, 'epoch': 1} {'type': 'loss', 'content': 0.14801079034805298, 'timestamp': '2025-09-30 22:17:38.263620', 'step': 4606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:38.298845', 'step': 4606, 'epoch': 1} {'type': 'loss', 'content': 0.12484288960695267, 'timestamp': '2025-09-30 22:17:38.306949', 'step': 4607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.342097', 'step': 4607, 'epoch': 1} {'type': 'loss', 'content': 0.1439277082681656, 'timestamp': '2025-09-30 22:17:38.369257', 'step': 4608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.400055', 'step': 4608, 'epoch': 1} {'type': 'loss', 'content': 0.13812674582004547, 'timestamp': '2025-09-30 22:17:38.402637', 'step': 4609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:38.439563', 'step': 4609, 'epoch': 1} {'type': 'loss', 'content': 0.1684415489435196, 'timestamp': '2025-09-30 22:17:38.442693', 'step': 4610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.476361', 'step': 4610, 'epoch': 1} {'type': 'loss', 'content': 0.1746217906475067, 'timestamp': '2025-09-30 22:17:38.479401', 'step': 4611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.509732', 'step': 4611, 'epoch': 1} {'type': 'loss', 'content': 0.11091298609972, 'timestamp': '2025-09-30 22:17:38.537943', 'step': 4612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.567766', 'step': 4612, 'epoch': 1} {'type': 'loss', 'content': 0.2891596853733063, 'timestamp': '2025-09-30 22:17:38.570754', 'step': 4613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.601934', 'step': 4613, 'epoch': 1} {'type': 'loss', 'content': 0.14346636831760406, 'timestamp': '2025-09-30 22:17:38.604534', 'step': 4614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:38.634961', 'step': 4614, 'epoch': 1} {'type': 'loss', 'content': 0.10558392107486725, 'timestamp': '2025-09-30 22:17:38.637776', 'step': 4615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:38.668002', 'step': 4615, 'epoch': 1} {'type': 'loss', 'content': 0.14694571495056152, 'timestamp': '2025-09-30 22:17:38.697106', 'step': 4616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:38.729084', 'step': 4616, 'epoch': 1} {'type': 'loss', 'content': 0.11886422336101532, 'timestamp': '2025-09-30 22:17:38.736775', 'step': 4617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:38.773410', 'step': 4617, 'epoch': 1} {'type': 'loss', 'content': 0.1270872801542282, 'timestamp': '2025-09-30 22:17:38.776193', 'step': 4618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:38.806309', 'step': 4618, 'epoch': 1} {'type': 'loss', 'content': 0.14548054337501526, 'timestamp': '2025-09-30 22:17:38.812479', 'step': 4619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.850797', 'step': 4619, 'epoch': 1} {'type': 'loss', 'content': 0.2207992523908615, 'timestamp': '2025-09-30 22:17:38.881912', 'step': 4620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:38.919055', 'step': 4620, 'epoch': 1} {'type': 'loss', 'content': 0.1523256003856659, 'timestamp': '2025-09-30 22:17:38.922808', 'step': 4621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:38.970079', 'step': 4621, 'epoch': 1} {'type': 'loss', 'content': 0.14632345736026764, 'timestamp': '2025-09-30 22:17:38.974008', 'step': 4622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:39.011377', 'step': 4622, 'epoch': 1} {'type': 'loss', 'content': 0.1126515343785286, 'timestamp': '2025-09-30 22:17:39.015300', 'step': 4623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:39.063699', 'step': 4623, 'epoch': 1} {'type': 'loss', 'content': 0.1644764393568039, 'timestamp': '2025-09-30 22:17:39.091985', 'step': 4624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.127692', 'step': 4624, 'epoch': 1} {'type': 'loss', 'content': 0.3800862729549408, 'timestamp': '2025-09-30 22:17:39.131386', 'step': 4625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:39.171835', 'step': 4625, 'epoch': 1} {'type': 'loss', 'content': 0.21543487906455994, 'timestamp': '2025-09-30 22:17:39.178039', 'step': 4626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:39.211135', 'step': 4626, 'epoch': 1} {'type': 'loss', 'content': 0.19765213131904602, 'timestamp': '2025-09-30 22:17:39.218769', 'step': 4627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:39.257090', 'step': 4627, 'epoch': 1} {'type': 'loss', 'content': 0.088409423828125, 'timestamp': '2025-09-30 22:17:39.281904', 'step': 4628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.313096', 'step': 4628, 'epoch': 1} {'type': 'loss', 'content': 0.19087180495262146, 'timestamp': '2025-09-30 22:17:39.320504', 'step': 4629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:39.356539', 'step': 4629, 'epoch': 1} {'type': 'loss', 'content': 0.1564861685037613, 'timestamp': '2025-09-30 22:17:39.359096', 'step': 4630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.394763', 'step': 4630, 'epoch': 1} {'type': 'loss', 'content': 0.11050854623317719, 'timestamp': '2025-09-30 22:17:39.403699', 'step': 4631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.434398', 'step': 4631, 'epoch': 1} {'type': 'loss', 'content': 0.16870102286338806, 'timestamp': '2025-09-30 22:17:39.459281', 'step': 4632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:39.503387', 'step': 4632, 'epoch': 1} {'type': 'loss', 'content': 0.17401479184627533, 'timestamp': '2025-09-30 22:17:39.511150', 'step': 4633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.544817', 'step': 4633, 'epoch': 1} {'type': 'loss', 'content': 0.12510724365711212, 'timestamp': '2025-09-30 22:17:39.549041', 'step': 4634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:39.579639', 'step': 4634, 'epoch': 1} {'type': 'loss', 'content': 0.13548627495765686, 'timestamp': '2025-09-30 22:17:39.584274', 'step': 4635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:39.615043', 'step': 4635, 'epoch': 1} {'type': 'loss', 'content': 0.14419569075107574, 'timestamp': '2025-09-30 22:17:39.639953', 'step': 4636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:39.669517', 'step': 4636, 'epoch': 1} {'type': 'loss', 'content': 0.2134476900100708, 'timestamp': '2025-09-30 22:17:39.671915', 'step': 4637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.701894', 'step': 4637, 'epoch': 1} {'type': 'loss', 'content': 0.1876639425754547, 'timestamp': '2025-09-30 22:17:39.704656', 'step': 4638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.740352', 'step': 4638, 'epoch': 1} {'type': 'loss', 'content': 0.27081483602523804, 'timestamp': '2025-09-30 22:17:39.747685', 'step': 4639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:39.784687', 'step': 4639, 'epoch': 1} {'type': 'loss', 'content': 0.22056449949741364, 'timestamp': '2025-09-30 22:17:39.809831', 'step': 4640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.843180', 'step': 4640, 'epoch': 1} {'type': 'loss', 'content': 0.14643651247024536, 'timestamp': '2025-09-30 22:17:39.847220', 'step': 4641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:39.878664', 'step': 4641, 'epoch': 1} {'type': 'loss', 'content': 0.14508548378944397, 'timestamp': '2025-09-30 22:17:39.894952', 'step': 4642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:39.932027', 'step': 4642, 'epoch': 1} {'type': 'loss', 'content': 0.08561258018016815, 'timestamp': '2025-09-30 22:17:39.934698', 'step': 4643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:39.966670', 'step': 4643, 'epoch': 1} {'type': 'loss', 'content': 0.21488308906555176, 'timestamp': '2025-09-30 22:17:39.998035', 'step': 4644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:40.029132', 'step': 4644, 'epoch': 1} {'type': 'loss', 'content': 0.18183554708957672, 'timestamp': '2025-09-30 22:17:40.038963', 'step': 4645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:40.070496', 'step': 4645, 'epoch': 1} {'type': 'loss', 'content': 0.13370342552661896, 'timestamp': '2025-09-30 22:17:40.078694', 'step': 4646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:40.109811', 'step': 4646, 'epoch': 1} {'type': 'loss', 'content': 0.16276735067367554, 'timestamp': '2025-09-30 22:17:40.117072', 'step': 4647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:40.150118', 'step': 4647, 'epoch': 1} {'type': 'loss', 'content': 0.1238747239112854, 'timestamp': '2025-09-30 22:17:40.177835', 'step': 4648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:40.211772', 'step': 4648, 'epoch': 1} {'type': 'loss', 'content': 0.18286564946174622, 'timestamp': '2025-09-30 22:17:40.217917', 'step': 4649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:40.252081', 'step': 4649, 'epoch': 1} {'type': 'loss', 'content': 0.16607055068016052, 'timestamp': '2025-09-30 22:17:40.255144', 'step': 4650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:40.290391', 'step': 4650, 'epoch': 1} {'type': 'loss', 'content': 0.24490559101104736, 'timestamp': '2025-09-30 22:17:40.296646', 'step': 4651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:40.330629', 'step': 4651, 'epoch': 1} {'type': 'loss', 'content': 0.10408192127943039, 'timestamp': '2025-09-30 22:17:40.355167', 'step': 4652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:40.388695', 'step': 4652, 'epoch': 1} {'type': 'loss', 'content': 0.15798990428447723, 'timestamp': '2025-09-30 22:17:40.393815', 'step': 4653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:40.427329', 'step': 4653, 'epoch': 1} {'type': 'loss', 'content': 0.2737422585487366, 'timestamp': '2025-09-30 22:17:40.432254', 'step': 4654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:40.464102', 'step': 4654, 'epoch': 1} {'type': 'loss', 'content': 0.16275988519191742, 'timestamp': '2025-09-30 22:17:40.469836', 'step': 4655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:40.502567', 'step': 4655, 'epoch': 1} {'type': 'loss', 'content': 0.1314413994550705, 'timestamp': '2025-09-30 22:17:40.528795', 'step': 4656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:40.560784', 'step': 4656, 'epoch': 1} {'type': 'loss', 'content': 0.17196433246135712, 'timestamp': '2025-09-30 22:17:40.565148', 'step': 4657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:40.596622', 'step': 4657, 'epoch': 1} {'type': 'loss', 'content': 0.18536153435707092, 'timestamp': '2025-09-30 22:17:40.602332', 'step': 4658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:40.632520', 'step': 4658, 'epoch': 1} {'type': 'loss', 'content': 0.13822321593761444, 'timestamp': '2025-09-30 22:17:40.639593', 'step': 4659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:40.672854', 'step': 4659, 'epoch': 1} {'type': 'loss', 'content': 0.21933385729789734, 'timestamp': '2025-09-30 22:17:40.696815', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:17:48.281166', 'step': 4660, 'epoch': 1} {'type': 'pplx', 'content': 8894.589911146855, 'timestamp': '2025-09-30 22:17:48.285814', 'step': 4660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.316154', 'step': 4660, 'epoch': 1} {'type': 'loss', 'content': 0.13767360150814056, 'timestamp': '2025-09-30 22:17:48.327475', 'step': 4661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.365925', 'step': 4661, 'epoch': 1} {'type': 'loss', 'content': 0.16465699672698975, 'timestamp': '2025-09-30 22:17:48.369178', 'step': 4662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:48.400193', 'step': 4662, 'epoch': 1} {'type': 'loss', 'content': 0.14655515551567078, 'timestamp': '2025-09-30 22:17:48.411508', 'step': 4663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.442299', 'step': 4663, 'epoch': 1} {'type': 'loss', 'content': 0.1464594602584839, 'timestamp': '2025-09-30 22:17:48.475471', 'step': 4664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.507054', 'step': 4664, 'epoch': 1} {'type': 'loss', 'content': 0.09756205230951309, 'timestamp': '2025-09-30 22:17:48.509853', 'step': 4665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.543931', 'step': 4665, 'epoch': 1} {'type': 'loss', 'content': 0.2078457474708557, 'timestamp': '2025-09-30 22:17:48.554457', 'step': 4666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:48.594287', 'step': 4666, 'epoch': 1} {'type': 'loss', 'content': 0.1277424842119217, 'timestamp': '2025-09-30 22:17:48.598291', 'step': 4667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.631702', 'step': 4667, 'epoch': 1} {'type': 'loss', 'content': 0.22201809287071228, 'timestamp': '2025-09-30 22:17:48.663216', 'step': 4668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:48.702812', 'step': 4668, 'epoch': 1} {'type': 'loss', 'content': 0.1493125706911087, 'timestamp': '2025-09-30 22:17:48.711287', 'step': 4669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:48.752757', 'step': 4669, 'epoch': 1} {'type': 'loss', 'content': 0.20845559239387512, 'timestamp': '2025-09-30 22:17:48.756118', 'step': 4670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.794830', 'step': 4670, 'epoch': 1} {'type': 'loss', 'content': 0.1953592449426651, 'timestamp': '2025-09-30 22:17:48.799228', 'step': 4671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:48.831631', 'step': 4671, 'epoch': 1} {'type': 'loss', 'content': 0.1752881407737732, 'timestamp': '2025-09-30 22:17:48.856361', 'step': 4672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:48.887219', 'step': 4672, 'epoch': 1} {'type': 'loss', 'content': 0.14589641988277435, 'timestamp': '2025-09-30 22:17:48.901193', 'step': 4673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:48.934040', 'step': 4673, 'epoch': 1} {'type': 'loss', 'content': 0.14752016961574554, 'timestamp': '2025-09-30 22:17:48.944152', 'step': 4674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:48.982760', 'step': 4674, 'epoch': 1} {'type': 'loss', 'content': 0.17918401956558228, 'timestamp': '2025-09-30 22:17:48.985451', 'step': 4675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.017482', 'step': 4675, 'epoch': 1} {'type': 'loss', 'content': 0.177066832780838, 'timestamp': '2025-09-30 22:17:49.051071', 'step': 4676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.081944', 'step': 4676, 'epoch': 1} {'type': 'loss', 'content': 0.2130919098854065, 'timestamp': '2025-09-30 22:17:49.085129', 'step': 4677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.124265', 'step': 4677, 'epoch': 1} {'type': 'loss', 'content': 0.13057617843151093, 'timestamp': '2025-09-30 22:17:49.127922', 'step': 4678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.159261', 'step': 4678, 'epoch': 1} {'type': 'loss', 'content': 0.20045599341392517, 'timestamp': '2025-09-30 22:17:49.162731', 'step': 4679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:49.194425', 'step': 4679, 'epoch': 1} {'type': 'loss', 'content': 0.16755075752735138, 'timestamp': '2025-09-30 22:17:49.227103', 'step': 4680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:49.271400', 'step': 4680, 'epoch': 1} {'type': 'loss', 'content': 0.11651822179555893, 'timestamp': '2025-09-30 22:17:49.274575', 'step': 4681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.308083', 'step': 4681, 'epoch': 1} {'type': 'loss', 'content': 0.19431978464126587, 'timestamp': '2025-09-30 22:17:49.318591', 'step': 4682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.364688', 'step': 4682, 'epoch': 1} {'type': 'loss', 'content': 0.09549449384212494, 'timestamp': '2025-09-30 22:17:49.368002', 'step': 4683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.407870', 'step': 4683, 'epoch': 1} {'type': 'loss', 'content': 0.21731120347976685, 'timestamp': '2025-09-30 22:17:49.441557', 'step': 4684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:49.498701', 'step': 4684, 'epoch': 1} {'type': 'loss', 'content': 0.16856548190116882, 'timestamp': '2025-09-30 22:17:49.509356', 'step': 4685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.555396', 'step': 4685, 'epoch': 1} {'type': 'loss', 'content': 0.18555693328380585, 'timestamp': '2025-09-30 22:17:49.558087', 'step': 4686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:49.590956', 'step': 4686, 'epoch': 1} {'type': 'loss', 'content': 0.1565079241991043, 'timestamp': '2025-09-30 22:17:49.594147', 'step': 4687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.628981', 'step': 4687, 'epoch': 1} {'type': 'loss', 'content': 0.10559253394603729, 'timestamp': '2025-09-30 22:17:49.664904', 'step': 4688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:49.710538', 'step': 4688, 'epoch': 1} {'type': 'loss', 'content': 0.046787701547145844, 'timestamp': '2025-09-30 22:17:49.713396', 'step': 4689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:49.757159', 'step': 4689, 'epoch': 1} {'type': 'loss', 'content': 0.09014301002025604, 'timestamp': '2025-09-30 22:17:49.769529', 'step': 4690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.813900', 'step': 4690, 'epoch': 1} {'type': 'loss', 'content': 0.17615818977355957, 'timestamp': '2025-09-30 22:17:49.825815', 'step': 4691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.874948', 'step': 4691, 'epoch': 1} {'type': 'loss', 'content': 0.08940696716308594, 'timestamp': '2025-09-30 22:17:49.907257', 'step': 4692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:49.950380', 'step': 4692, 'epoch': 1} {'type': 'loss', 'content': 0.10720673203468323, 'timestamp': '2025-09-30 22:17:49.952963', 'step': 4693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.012006', 'step': 4693, 'epoch': 1} {'type': 'loss', 'content': 0.17612361907958984, 'timestamp': '2025-09-30 22:17:50.017515', 'step': 4694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.058576', 'step': 4694, 'epoch': 1} {'type': 'loss', 'content': 0.15715773403644562, 'timestamp': '2025-09-30 22:17:50.062021', 'step': 4695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:50.101479', 'step': 4695, 'epoch': 1} {'type': 'loss', 'content': 0.1390732377767563, 'timestamp': '2025-09-30 22:17:50.126428', 'step': 4696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:50.171196', 'step': 4696, 'epoch': 1} {'type': 'loss', 'content': 0.09077180922031403, 'timestamp': '2025-09-30 22:17:50.177703', 'step': 4697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:50.211129', 'step': 4697, 'epoch': 1} {'type': 'loss', 'content': 0.23121558129787445, 'timestamp': '2025-09-30 22:17:50.222629', 'step': 4698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:50.254820', 'step': 4698, 'epoch': 1} {'type': 'loss', 'content': 0.14919506013393402, 'timestamp': '2025-09-30 22:17:50.260255', 'step': 4699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.305187', 'step': 4699, 'epoch': 1} {'type': 'loss', 'content': 0.25019732117652893, 'timestamp': '2025-09-30 22:17:50.339420', 'step': 4700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:50.373820', 'step': 4700, 'epoch': 1} {'type': 'loss', 'content': 0.14765490591526031, 'timestamp': '2025-09-30 22:17:50.376367', 'step': 4701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.427088', 'step': 4701, 'epoch': 1} {'type': 'loss', 'content': 0.12454604357481003, 'timestamp': '2025-09-30 22:17:50.431189', 'step': 4702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:50.478617', 'step': 4702, 'epoch': 1} {'type': 'loss', 'content': 0.1106734350323677, 'timestamp': '2025-09-30 22:17:50.484359', 'step': 4703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:50.531860', 'step': 4703, 'epoch': 1} {'type': 'loss', 'content': 0.19658370316028595, 'timestamp': '2025-09-30 22:17:50.558014', 'step': 4704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.620317', 'step': 4704, 'epoch': 1} {'type': 'loss', 'content': 0.09798906743526459, 'timestamp': '2025-09-30 22:17:50.624107', 'step': 4705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:50.660332', 'step': 4705, 'epoch': 1} {'type': 'loss', 'content': 0.11539477854967117, 'timestamp': '2025-09-30 22:17:50.664108', 'step': 4706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.719248', 'step': 4706, 'epoch': 1} {'type': 'loss', 'content': 0.15169444680213928, 'timestamp': '2025-09-30 22:17:50.730624', 'step': 4707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:50.773401', 'step': 4707, 'epoch': 1} {'type': 'loss', 'content': 0.1550281047821045, 'timestamp': '2025-09-30 22:17:50.799307', 'step': 4708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:50.848313', 'step': 4708, 'epoch': 1} {'type': 'loss', 'content': 0.12581568956375122, 'timestamp': '2025-09-30 22:17:50.865444', 'step': 4709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.905181', 'step': 4709, 'epoch': 1} {'type': 'loss', 'content': 0.14772170782089233, 'timestamp': '2025-09-30 22:17:50.910013', 'step': 4710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:50.947590', 'step': 4710, 'epoch': 1} {'type': 'loss', 'content': 0.16949892044067383, 'timestamp': '2025-09-30 22:17:50.951124', 'step': 4711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:50.982598', 'step': 4711, 'epoch': 1} {'type': 'loss', 'content': 0.15076547861099243, 'timestamp': '2025-09-30 22:17:51.007437', 'step': 4712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.045443', 'step': 4712, 'epoch': 1} {'type': 'loss', 'content': 0.17720237374305725, 'timestamp': '2025-09-30 22:17:51.057569', 'step': 4713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:51.098321', 'step': 4713, 'epoch': 1} {'type': 'loss', 'content': 0.40466538071632385, 'timestamp': '2025-09-30 22:17:51.109024', 'step': 4714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:51.140741', 'step': 4714, 'epoch': 1} {'type': 'loss', 'content': 0.24376192688941956, 'timestamp': '2025-09-30 22:17:51.158387', 'step': 4715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.190028', 'step': 4715, 'epoch': 1} {'type': 'loss', 'content': 0.17072351276874542, 'timestamp': '2025-09-30 22:17:51.215778', 'step': 4716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.250099', 'step': 4716, 'epoch': 1} {'type': 'loss', 'content': 0.17304284870624542, 'timestamp': '2025-09-30 22:17:51.255524', 'step': 4717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.297711', 'step': 4717, 'epoch': 1} {'type': 'loss', 'content': 0.11366588622331619, 'timestamp': '2025-09-30 22:17:51.311848', 'step': 4718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:51.344173', 'step': 4718, 'epoch': 1} {'type': 'loss', 'content': 0.1544954627752304, 'timestamp': '2025-09-30 22:17:51.347453', 'step': 4719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.385529', 'step': 4719, 'epoch': 1} {'type': 'loss', 'content': 0.06533054262399673, 'timestamp': '2025-09-30 22:17:51.410927', 'step': 4720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.441792', 'step': 4720, 'epoch': 1} {'type': 'loss', 'content': 0.19968651235103607, 'timestamp': '2025-09-30 22:17:51.445667', 'step': 4721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:51.476862', 'step': 4721, 'epoch': 1} {'type': 'loss', 'content': 0.11520197242498398, 'timestamp': '2025-09-30 22:17:51.488754', 'step': 4722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.520287', 'step': 4722, 'epoch': 1} {'type': 'loss', 'content': 0.17385432124137878, 'timestamp': '2025-09-30 22:17:51.524280', 'step': 4723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.556268', 'step': 4723, 'epoch': 1} {'type': 'loss', 'content': 0.12503919005393982, 'timestamp': '2025-09-30 22:17:51.592714', 'step': 4724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.626190', 'step': 4724, 'epoch': 1} {'type': 'loss', 'content': 0.11861790716648102, 'timestamp': '2025-09-30 22:17:51.631229', 'step': 4725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.670607', 'step': 4725, 'epoch': 1} {'type': 'loss', 'content': 0.2917313873767853, 'timestamp': '2025-09-30 22:17:51.673843', 'step': 4726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.706993', 'step': 4726, 'epoch': 1} {'type': 'loss', 'content': 0.2397080957889557, 'timestamp': '2025-09-30 22:17:51.717192', 'step': 4727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:51.763396', 'step': 4727, 'epoch': 1} {'type': 'loss', 'content': 0.2315400093793869, 'timestamp': '2025-09-30 22:17:51.789145', 'step': 4728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.821583', 'step': 4728, 'epoch': 1} {'type': 'loss', 'content': 0.06749480962753296, 'timestamp': '2025-09-30 22:17:51.825455', 'step': 4729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.858270', 'step': 4729, 'epoch': 1} {'type': 'loss', 'content': 0.1216631606221199, 'timestamp': '2025-09-30 22:17:51.861251', 'step': 4730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:51.896025', 'step': 4730, 'epoch': 1} {'type': 'loss', 'content': 0.1520160436630249, 'timestamp': '2025-09-30 22:17:51.909251', 'step': 4731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:51.942201', 'step': 4731, 'epoch': 1} {'type': 'loss', 'content': 0.17173661291599274, 'timestamp': '2025-09-30 22:17:51.969823', 'step': 4732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:52.013455', 'step': 4732, 'epoch': 1} {'type': 'loss', 'content': 0.1464739292860031, 'timestamp': '2025-09-30 22:17:52.027393', 'step': 4733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.060891', 'step': 4733, 'epoch': 1} {'type': 'loss', 'content': 0.16003984212875366, 'timestamp': '2025-09-30 22:17:52.063858', 'step': 4734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:52.113685', 'step': 4734, 'epoch': 1} {'type': 'loss', 'content': 0.23887209594249725, 'timestamp': '2025-09-30 22:17:52.117230', 'step': 4735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.151074', 'step': 4735, 'epoch': 1} {'type': 'loss', 'content': 0.1507623940706253, 'timestamp': '2025-09-30 22:17:52.175846', 'step': 4736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.217207', 'step': 4736, 'epoch': 1} {'type': 'loss', 'content': 0.10528815537691116, 'timestamp': '2025-09-30 22:17:52.229767', 'step': 4737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:52.261682', 'step': 4737, 'epoch': 1} {'type': 'loss', 'content': 0.1594475358724594, 'timestamp': '2025-09-30 22:17:52.273449', 'step': 4738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.305946', 'step': 4738, 'epoch': 1} {'type': 'loss', 'content': 0.1772817075252533, 'timestamp': '2025-09-30 22:17:52.308718', 'step': 4739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:52.340901', 'step': 4739, 'epoch': 1} {'type': 'loss', 'content': 0.11380399018526077, 'timestamp': '2025-09-30 22:17:52.365164', 'step': 4740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:52.397319', 'step': 4740, 'epoch': 1} {'type': 'loss', 'content': 0.20659498870372772, 'timestamp': '2025-09-30 22:17:52.406578', 'step': 4741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.446940', 'step': 4741, 'epoch': 1} {'type': 'loss', 'content': 0.1103982999920845, 'timestamp': '2025-09-30 22:17:52.458927', 'step': 4742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.500759', 'step': 4742, 'epoch': 1} {'type': 'loss', 'content': 0.17085951566696167, 'timestamp': '2025-09-30 22:17:52.510916', 'step': 4743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:52.542726', 'step': 4743, 'epoch': 1} {'type': 'loss', 'content': 0.09675208479166031, 'timestamp': '2025-09-30 22:17:52.572105', 'step': 4744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:52.609302', 'step': 4744, 'epoch': 1} {'type': 'loss', 'content': 0.08445372432470322, 'timestamp': '2025-09-30 22:17:52.616448', 'step': 4745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.666976', 'step': 4745, 'epoch': 1} {'type': 'loss', 'content': 0.2464788407087326, 'timestamp': '2025-09-30 22:17:52.676017', 'step': 4746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.707648', 'step': 4746, 'epoch': 1} {'type': 'loss', 'content': 0.31832361221313477, 'timestamp': '2025-09-30 22:17:52.710468', 'step': 4747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:52.742268', 'step': 4747, 'epoch': 1} {'type': 'loss', 'content': 0.10943956673145294, 'timestamp': '2025-09-30 22:17:52.766704', 'step': 4748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:52.797681', 'step': 4748, 'epoch': 1} {'type': 'loss', 'content': 0.194221630692482, 'timestamp': '2025-09-30 22:17:52.800792', 'step': 4749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:52.832844', 'step': 4749, 'epoch': 1} {'type': 'loss', 'content': 0.1273062378168106, 'timestamp': '2025-09-30 22:17:52.837748', 'step': 4750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:52.877974', 'step': 4750, 'epoch': 1} {'type': 'loss', 'content': 0.21540963649749756, 'timestamp': '2025-09-30 22:17:52.880810', 'step': 4751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:52.911633', 'step': 4751, 'epoch': 1} {'type': 'loss', 'content': 0.1749739944934845, 'timestamp': '2025-09-30 22:17:52.941253', 'step': 4752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:52.975343', 'step': 4752, 'epoch': 1} {'type': 'loss', 'content': 0.23826760053634644, 'timestamp': '2025-09-30 22:17:52.977791', 'step': 4753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.013070', 'step': 4753, 'epoch': 1} {'type': 'loss', 'content': 0.192490354180336, 'timestamp': '2025-09-30 22:17:53.026345', 'step': 4754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.069060', 'step': 4754, 'epoch': 1} {'type': 'loss', 'content': 0.18597614765167236, 'timestamp': '2025-09-30 22:17:53.081099', 'step': 4755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.122954', 'step': 4755, 'epoch': 1} {'type': 'loss', 'content': 0.1832941174507141, 'timestamp': '2025-09-30 22:17:53.157642', 'step': 4756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.188822', 'step': 4756, 'epoch': 1} {'type': 'loss', 'content': 0.1626274287700653, 'timestamp': '2025-09-30 22:17:53.196397', 'step': 4757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:53.228930', 'step': 4757, 'epoch': 1} {'type': 'loss', 'content': 0.11020619422197342, 'timestamp': '2025-09-30 22:17:53.232244', 'step': 4758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:53.264202', 'step': 4758, 'epoch': 1} {'type': 'loss', 'content': 0.18155038356781006, 'timestamp': '2025-09-30 22:17:53.266877', 'step': 4759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.301413', 'step': 4759, 'epoch': 1} {'type': 'loss', 'content': 0.18002428114414215, 'timestamp': '2025-09-30 22:17:53.331836', 'step': 4760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.366391', 'step': 4760, 'epoch': 1} {'type': 'loss', 'content': 0.20725207030773163, 'timestamp': '2025-09-30 22:17:53.374165', 'step': 4761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.406149', 'step': 4761, 'epoch': 1} {'type': 'loss', 'content': 0.06713978946208954, 'timestamp': '2025-09-30 22:17:53.412729', 'step': 4762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.449707', 'step': 4762, 'epoch': 1} {'type': 'loss', 'content': 0.06897454708814621, 'timestamp': '2025-09-30 22:17:53.452620', 'step': 4763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:53.483261', 'step': 4763, 'epoch': 1} {'type': 'loss', 'content': 0.12468364089727402, 'timestamp': '2025-09-30 22:17:53.508797', 'step': 4764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.543803', 'step': 4764, 'epoch': 1} {'type': 'loss', 'content': 0.16182610392570496, 'timestamp': '2025-09-30 22:17:53.546480', 'step': 4765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:53.581097', 'step': 4765, 'epoch': 1} {'type': 'loss', 'content': 0.28816187381744385, 'timestamp': '2025-09-30 22:17:53.586865', 'step': 4766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.618517', 'step': 4766, 'epoch': 1} {'type': 'loss', 'content': 0.23287123441696167, 'timestamp': '2025-09-30 22:17:53.621077', 'step': 4767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:53.657396', 'step': 4767, 'epoch': 1} {'type': 'loss', 'content': 0.243094801902771, 'timestamp': '2025-09-30 22:17:53.684105', 'step': 4768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.715071', 'step': 4768, 'epoch': 1} {'type': 'loss', 'content': 0.13797077536582947, 'timestamp': '2025-09-30 22:17:53.720342', 'step': 4769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:53.750942', 'step': 4769, 'epoch': 1} {'type': 'loss', 'content': 0.21650457382202148, 'timestamp': '2025-09-30 22:17:53.755949', 'step': 4770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.797043', 'step': 4770, 'epoch': 1} {'type': 'loss', 'content': 0.117307148873806, 'timestamp': '2025-09-30 22:17:53.800671', 'step': 4771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.834080', 'step': 4771, 'epoch': 1} {'type': 'loss', 'content': 0.09818796813488007, 'timestamp': '2025-09-30 22:17:53.860311', 'step': 4772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:53.894647', 'step': 4772, 'epoch': 1} {'type': 'loss', 'content': 0.21755866706371307, 'timestamp': '2025-09-30 22:17:53.920879', 'step': 4773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:53.964961', 'step': 4773, 'epoch': 1} {'type': 'loss', 'content': 0.21456806361675262, 'timestamp': '2025-09-30 22:17:53.972940', 'step': 4774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:54.013089', 'step': 4774, 'epoch': 1} {'type': 'loss', 'content': 0.1915920227766037, 'timestamp': '2025-09-30 22:17:54.026653', 'step': 4775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.065581', 'step': 4775, 'epoch': 1} {'type': 'loss', 'content': 0.0861278548836708, 'timestamp': '2025-09-30 22:17:54.090622', 'step': 4776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:54.133128', 'step': 4776, 'epoch': 1} {'type': 'loss', 'content': 0.15125004947185516, 'timestamp': '2025-09-30 22:17:54.136439', 'step': 4777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:54.170265', 'step': 4777, 'epoch': 1} {'type': 'loss', 'content': 0.1169574037194252, 'timestamp': '2025-09-30 22:17:54.177900', 'step': 4778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.219640', 'step': 4778, 'epoch': 1} {'type': 'loss', 'content': 0.15960387885570526, 'timestamp': '2025-09-30 22:17:54.237519', 'step': 4779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:54.284270', 'step': 4779, 'epoch': 1} {'type': 'loss', 'content': 0.17115789651870728, 'timestamp': '2025-09-30 22:17:54.315354', 'step': 4780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:54.349420', 'step': 4780, 'epoch': 1} {'type': 'loss', 'content': 0.26470810174942017, 'timestamp': '2025-09-30 22:17:54.367562', 'step': 4781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:54.407424', 'step': 4781, 'epoch': 1} {'type': 'loss', 'content': 0.22898758947849274, 'timestamp': '2025-09-30 22:17:54.410714', 'step': 4782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:54.443220', 'step': 4782, 'epoch': 1} {'type': 'loss', 'content': 0.10088776051998138, 'timestamp': '2025-09-30 22:17:54.445745', 'step': 4783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.476733', 'step': 4783, 'epoch': 1} {'type': 'loss', 'content': 0.2511308789253235, 'timestamp': '2025-09-30 22:17:54.502051', 'step': 4784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:54.540932', 'step': 4784, 'epoch': 1} {'type': 'loss', 'content': 0.13550309836864471, 'timestamp': '2025-09-30 22:17:54.550985', 'step': 4785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.592023', 'step': 4785, 'epoch': 1} {'type': 'loss', 'content': 0.1800771802663803, 'timestamp': '2025-09-30 22:17:54.595618', 'step': 4786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.632931', 'step': 4786, 'epoch': 1} {'type': 'loss', 'content': 0.1175234243273735, 'timestamp': '2025-09-30 22:17:54.662770', 'step': 4787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.707341', 'step': 4787, 'epoch': 1} {'type': 'loss', 'content': 0.14542578160762787, 'timestamp': '2025-09-30 22:17:54.731791', 'step': 4788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:54.778821', 'step': 4788, 'epoch': 1} {'type': 'loss', 'content': 0.1406458169221878, 'timestamp': '2025-09-30 22:17:54.804275', 'step': 4789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.844033', 'step': 4789, 'epoch': 1} {'type': 'loss', 'content': 0.1646399348974228, 'timestamp': '2025-09-30 22:17:54.850680', 'step': 4790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:54.898844', 'step': 4790, 'epoch': 1} {'type': 'loss', 'content': 0.13998520374298096, 'timestamp': '2025-09-30 22:17:54.907712', 'step': 4791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:54.954801', 'step': 4791, 'epoch': 1} {'type': 'loss', 'content': 0.2716398537158966, 'timestamp': '2025-09-30 22:17:54.981573', 'step': 4792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.025636', 'step': 4792, 'epoch': 1} {'type': 'loss', 'content': 0.28300854563713074, 'timestamp': '2025-09-30 22:17:55.035404', 'step': 4793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.076267', 'step': 4793, 'epoch': 1} {'type': 'loss', 'content': 0.15354549884796143, 'timestamp': '2025-09-30 22:17:55.087472', 'step': 4794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:55.129592', 'step': 4794, 'epoch': 1} {'type': 'loss', 'content': 0.18242889642715454, 'timestamp': '2025-09-30 22:17:55.134201', 'step': 4795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.173004', 'step': 4795, 'epoch': 1} {'type': 'loss', 'content': 0.11406969279050827, 'timestamp': '2025-09-30 22:17:55.200005', 'step': 4796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:55.238784', 'step': 4796, 'epoch': 1} {'type': 'loss', 'content': 0.1742333173751831, 'timestamp': '2025-09-30 22:17:55.243284', 'step': 4797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.282712', 'step': 4797, 'epoch': 1} {'type': 'loss', 'content': 0.1471826732158661, 'timestamp': '2025-09-30 22:17:55.287988', 'step': 4798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.320047', 'step': 4798, 'epoch': 1} {'type': 'loss', 'content': 0.15839631855487823, 'timestamp': '2025-09-30 22:17:55.323639', 'step': 4799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.355986', 'step': 4799, 'epoch': 1} {'type': 'loss', 'content': 0.15370574593544006, 'timestamp': '2025-09-30 22:17:55.387644', 'step': 4800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:55.425896', 'step': 4800, 'epoch': 1} {'type': 'loss', 'content': 0.14149852097034454, 'timestamp': '2025-09-30 22:17:55.428806', 'step': 4801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.461352', 'step': 4801, 'epoch': 1} {'type': 'loss', 'content': 0.1858416348695755, 'timestamp': '2025-09-30 22:17:55.466902', 'step': 4802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.500009', 'step': 4802, 'epoch': 1} {'type': 'loss', 'content': 0.1591450721025467, 'timestamp': '2025-09-30 22:17:55.506660', 'step': 4803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:55.540025', 'step': 4803, 'epoch': 1} {'type': 'loss', 'content': 0.18464207649230957, 'timestamp': '2025-09-30 22:17:55.565684', 'step': 4804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.596335', 'step': 4804, 'epoch': 1} {'type': 'loss', 'content': 0.30171534419059753, 'timestamp': '2025-09-30 22:17:55.600040', 'step': 4805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.651078', 'step': 4805, 'epoch': 1} {'type': 'loss', 'content': 0.17601382732391357, 'timestamp': '2025-09-30 22:17:55.653173', 'step': 4806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.685623', 'step': 4806, 'epoch': 1} {'type': 'loss', 'content': 0.08587303012609482, 'timestamp': '2025-09-30 22:17:55.690854', 'step': 4807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:55.727712', 'step': 4807, 'epoch': 1} {'type': 'loss', 'content': 0.16455045342445374, 'timestamp': '2025-09-30 22:17:55.755842', 'step': 4808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:55.788850', 'step': 4808, 'epoch': 1} {'type': 'loss', 'content': 0.17816665768623352, 'timestamp': '2025-09-30 22:17:55.793532', 'step': 4809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:17:55.828361', 'step': 4809, 'epoch': 1} {'type': 'loss', 'content': 0.19476458430290222, 'timestamp': '2025-09-30 22:17:55.834915', 'step': 4810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:55.877715', 'step': 4810, 'epoch': 1} {'type': 'loss', 'content': 0.1234324499964714, 'timestamp': '2025-09-30 22:17:55.880512', 'step': 4811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.914914', 'step': 4811, 'epoch': 1} {'type': 'loss', 'content': 0.19769831001758575, 'timestamp': '2025-09-30 22:17:55.939353', 'step': 4812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:55.975036', 'step': 4812, 'epoch': 1} {'type': 'loss', 'content': 0.24900126457214355, 'timestamp': '2025-09-30 22:17:55.979479', 'step': 4813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.015912', 'step': 4813, 'epoch': 1} {'type': 'loss', 'content': 0.24596716463565826, 'timestamp': '2025-09-30 22:17:56.021126', 'step': 4814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.054790', 'step': 4814, 'epoch': 1} {'type': 'loss', 'content': 0.0900687649846077, 'timestamp': '2025-09-30 22:17:56.059498', 'step': 4815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.097357', 'step': 4815, 'epoch': 1} {'type': 'loss', 'content': 0.17553125321865082, 'timestamp': '2025-09-30 22:17:56.128266', 'step': 4816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.160683', 'step': 4816, 'epoch': 1} {'type': 'loss', 'content': 0.16921237111091614, 'timestamp': '2025-09-30 22:17:56.163681', 'step': 4817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.195170', 'step': 4817, 'epoch': 1} {'type': 'loss', 'content': 0.1210145354270935, 'timestamp': '2025-09-30 22:17:56.199106', 'step': 4818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.233050', 'step': 4818, 'epoch': 1} {'type': 'loss', 'content': 0.11111599951982498, 'timestamp': '2025-09-30 22:17:56.238195', 'step': 4819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:56.272823', 'step': 4819, 'epoch': 1} {'type': 'loss', 'content': 0.12231653928756714, 'timestamp': '2025-09-30 22:17:56.304823', 'step': 4820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.336162', 'step': 4820, 'epoch': 1} {'type': 'loss', 'content': 0.19308333098888397, 'timestamp': '2025-09-30 22:17:56.341993', 'step': 4821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.374326', 'step': 4821, 'epoch': 1} {'type': 'loss', 'content': 0.18548113107681274, 'timestamp': '2025-09-30 22:17:56.379904', 'step': 4822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:56.411962', 'step': 4822, 'epoch': 1} {'type': 'loss', 'content': 0.06462083756923676, 'timestamp': '2025-09-30 22:17:56.415720', 'step': 4823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:56.447277', 'step': 4823, 'epoch': 1} {'type': 'loss', 'content': 0.10151340067386627, 'timestamp': '2025-09-30 22:17:56.472610', 'step': 4824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.506634', 'step': 4824, 'epoch': 1} {'type': 'loss', 'content': 0.10261169821023941, 'timestamp': '2025-09-30 22:17:56.510438', 'step': 4825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.542409', 'step': 4825, 'epoch': 1} {'type': 'loss', 'content': 0.09619457274675369, 'timestamp': '2025-09-30 22:17:56.546678', 'step': 4826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.578492', 'step': 4826, 'epoch': 1} {'type': 'loss', 'content': 0.07877319306135178, 'timestamp': '2025-09-30 22:17:56.581930', 'step': 4827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.613345', 'step': 4827, 'epoch': 1} {'type': 'loss', 'content': 0.11157609522342682, 'timestamp': '2025-09-30 22:17:56.638908', 'step': 4828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:56.684477', 'step': 4828, 'epoch': 1} {'type': 'loss', 'content': 0.15304212272167206, 'timestamp': '2025-09-30 22:17:56.687363', 'step': 4829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.717674', 'step': 4829, 'epoch': 1} {'type': 'loss', 'content': 0.14569860696792603, 'timestamp': '2025-09-30 22:17:56.721836', 'step': 4830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:56.752158', 'step': 4830, 'epoch': 1} {'type': 'loss', 'content': 0.13386310636997223, 'timestamp': '2025-09-30 22:17:56.757236', 'step': 4831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:56.787985', 'step': 4831, 'epoch': 1} {'type': 'loss', 'content': 0.20673467218875885, 'timestamp': '2025-09-30 22:17:56.816492', 'step': 4832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:56.849900', 'step': 4832, 'epoch': 1} {'type': 'loss', 'content': 0.0991704910993576, 'timestamp': '2025-09-30 22:17:56.853690', 'step': 4833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:56.886875', 'step': 4833, 'epoch': 1} {'type': 'loss', 'content': 0.154127299785614, 'timestamp': '2025-09-30 22:17:56.892805', 'step': 4834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:56.924954', 'step': 4834, 'epoch': 1} {'type': 'loss', 'content': 0.10653731971979141, 'timestamp': '2025-09-30 22:17:56.927472', 'step': 4835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:56.959530', 'step': 4835, 'epoch': 1} {'type': 'loss', 'content': 0.1330236792564392, 'timestamp': '2025-09-30 22:17:56.983847', 'step': 4836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.015375', 'step': 4836, 'epoch': 1} {'type': 'loss', 'content': 0.06759156286716461, 'timestamp': '2025-09-30 22:17:57.019656', 'step': 4837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.051644', 'step': 4837, 'epoch': 1} {'type': 'loss', 'content': 0.1800488978624344, 'timestamp': '2025-09-30 22:17:57.056843', 'step': 4838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.091361', 'step': 4838, 'epoch': 1} {'type': 'loss', 'content': 0.2399042844772339, 'timestamp': '2025-09-30 22:17:57.094127', 'step': 4839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.126618', 'step': 4839, 'epoch': 1} {'type': 'loss', 'content': 0.12796203792095184, 'timestamp': '2025-09-30 22:17:57.152623', 'step': 4840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:57.183018', 'step': 4840, 'epoch': 1} {'type': 'loss', 'content': 0.1345786154270172, 'timestamp': '2025-09-30 22:17:57.185438', 'step': 4841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.216381', 'step': 4841, 'epoch': 1} {'type': 'loss', 'content': 0.14868322014808655, 'timestamp': '2025-09-30 22:17:57.219993', 'step': 4842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:57.252544', 'step': 4842, 'epoch': 1} {'type': 'loss', 'content': 0.19690054655075073, 'timestamp': '2025-09-30 22:17:57.258045', 'step': 4843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.290515', 'step': 4843, 'epoch': 1} {'type': 'loss', 'content': 0.11960553377866745, 'timestamp': '2025-09-30 22:17:57.314712', 'step': 4844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.347555', 'step': 4844, 'epoch': 1} {'type': 'loss', 'content': 0.1825098693370819, 'timestamp': '2025-09-30 22:17:57.352334', 'step': 4845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.384011', 'step': 4845, 'epoch': 1} {'type': 'loss', 'content': 0.07323572039604187, 'timestamp': '2025-09-30 22:17:57.389718', 'step': 4846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.425030', 'step': 4846, 'epoch': 1} {'type': 'loss', 'content': 0.1573401540517807, 'timestamp': '2025-09-30 22:17:57.430116', 'step': 4847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.464332', 'step': 4847, 'epoch': 1} {'type': 'loss', 'content': 0.16120575368404388, 'timestamp': '2025-09-30 22:17:57.491799', 'step': 4848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.525828', 'step': 4848, 'epoch': 1} {'type': 'loss', 'content': 0.2914454936981201, 'timestamp': '2025-09-30 22:17:57.531671', 'step': 4849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.565536', 'step': 4849, 'epoch': 1} {'type': 'loss', 'content': 0.1361238658428192, 'timestamp': '2025-09-30 22:17:57.570498', 'step': 4850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.602591', 'step': 4850, 'epoch': 1} {'type': 'loss', 'content': 0.14019551873207092, 'timestamp': '2025-09-30 22:17:57.605292', 'step': 4851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:57.636329', 'step': 4851, 'epoch': 1} {'type': 'loss', 'content': 0.1914013773202896, 'timestamp': '2025-09-30 22:17:57.663484', 'step': 4852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:57.697660', 'step': 4852, 'epoch': 1} {'type': 'loss', 'content': 0.13391926884651184, 'timestamp': '2025-09-30 22:17:57.703053', 'step': 4853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.735116', 'step': 4853, 'epoch': 1} {'type': 'loss', 'content': 0.15171143412590027, 'timestamp': '2025-09-30 22:17:57.739979', 'step': 4854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.774822', 'step': 4854, 'epoch': 1} {'type': 'loss', 'content': 0.1752769500017166, 'timestamp': '2025-09-30 22:17:57.782910', 'step': 4855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:57.816110', 'step': 4855, 'epoch': 1} {'type': 'loss', 'content': 0.15479663014411926, 'timestamp': '2025-09-30 22:17:57.840731', 'step': 4856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:57.872775', 'step': 4856, 'epoch': 1} {'type': 'loss', 'content': 0.06429316848516464, 'timestamp': '2025-09-30 22:17:57.879586', 'step': 4857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:57.922596', 'step': 4857, 'epoch': 1} {'type': 'loss', 'content': 0.3040847182273865, 'timestamp': '2025-09-30 22:17:57.930631', 'step': 4858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:57.966397', 'step': 4858, 'epoch': 1} {'type': 'loss', 'content': 0.1957794576883316, 'timestamp': '2025-09-30 22:17:57.980361', 'step': 4859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.022263', 'step': 4859, 'epoch': 1} {'type': 'loss', 'content': 0.10282859206199646, 'timestamp': '2025-09-30 22:17:58.046359', 'step': 4860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.077351', 'step': 4860, 'epoch': 1} {'type': 'loss', 'content': 0.20133092999458313, 'timestamp': '2025-09-30 22:17:58.082552', 'step': 4861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.115706', 'step': 4861, 'epoch': 1} {'type': 'loss', 'content': 0.13190676271915436, 'timestamp': '2025-09-30 22:17:58.121724', 'step': 4862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:58.155069', 'step': 4862, 'epoch': 1} {'type': 'loss', 'content': 0.28581610321998596, 'timestamp': '2025-09-30 22:17:58.158378', 'step': 4863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:58.191090', 'step': 4863, 'epoch': 1} {'type': 'loss', 'content': 0.09931501001119614, 'timestamp': '2025-09-30 22:17:58.217110', 'step': 4864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:58.248895', 'step': 4864, 'epoch': 1} {'type': 'loss', 'content': 0.13717682659626007, 'timestamp': '2025-09-30 22:17:58.254753', 'step': 4865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:17:58.287960', 'step': 4865, 'epoch': 1} {'type': 'loss', 'content': 0.20346051454544067, 'timestamp': '2025-09-30 22:17:58.292216', 'step': 4866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.323393', 'step': 4866, 'epoch': 1} {'type': 'loss', 'content': 0.19145186245441437, 'timestamp': '2025-09-30 22:17:58.325860', 'step': 4867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:58.356193', 'step': 4867, 'epoch': 1} {'type': 'loss', 'content': 0.20642702281475067, 'timestamp': '2025-09-30 22:17:58.382035', 'step': 4868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.414675', 'step': 4868, 'epoch': 1} {'type': 'loss', 'content': 0.3402271866798401, 'timestamp': '2025-09-30 22:17:58.417063', 'step': 4869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.447659', 'step': 4869, 'epoch': 1} {'type': 'loss', 'content': 0.20540925860404968, 'timestamp': '2025-09-30 22:17:58.451758', 'step': 4870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.485757', 'step': 4870, 'epoch': 1} {'type': 'loss', 'content': 0.15101076662540436, 'timestamp': '2025-09-30 22:17:58.489645', 'step': 4871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.522474', 'step': 4871, 'epoch': 1} {'type': 'loss', 'content': 0.10152280330657959, 'timestamp': '2025-09-30 22:17:58.549824', 'step': 4872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.579907', 'step': 4872, 'epoch': 1} {'type': 'loss', 'content': 0.12156422436237335, 'timestamp': '2025-09-30 22:17:58.584104', 'step': 4873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.614198', 'step': 4873, 'epoch': 1} {'type': 'loss', 'content': 0.17767541110515594, 'timestamp': '2025-09-30 22:17:58.616835', 'step': 4874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.660102', 'step': 4874, 'epoch': 1} {'type': 'loss', 'content': 0.19097742438316345, 'timestamp': '2025-09-30 22:17:58.663018', 'step': 4875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:58.693940', 'step': 4875, 'epoch': 1} {'type': 'loss', 'content': 0.14717921614646912, 'timestamp': '2025-09-30 22:17:58.720802', 'step': 4876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:58.751198', 'step': 4876, 'epoch': 1} {'type': 'loss', 'content': 0.18514159321784973, 'timestamp': '2025-09-30 22:17:58.754525', 'step': 4877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:58.805855', 'step': 4877, 'epoch': 1} {'type': 'loss', 'content': 0.09277257323265076, 'timestamp': '2025-09-30 22:17:58.812517', 'step': 4878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.842518', 'step': 4878, 'epoch': 1} {'type': 'loss', 'content': 0.25429004430770874, 'timestamp': '2025-09-30 22:17:58.849091', 'step': 4879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.883292', 'step': 4879, 'epoch': 1} {'type': 'loss', 'content': 0.1350322812795639, 'timestamp': '2025-09-30 22:17:58.908519', 'step': 4880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.939363', 'step': 4880, 'epoch': 1} {'type': 'loss', 'content': 0.2088952213525772, 'timestamp': '2025-09-30 22:17:58.946516', 'step': 4881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:58.983061', 'step': 4881, 'epoch': 1} {'type': 'loss', 'content': 0.17897801101207733, 'timestamp': '2025-09-30 22:17:58.989005', 'step': 4882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:59.019330', 'step': 4882, 'epoch': 1} {'type': 'loss', 'content': 0.10523354262113571, 'timestamp': '2025-09-30 22:17:59.021867', 'step': 4883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:59.059171', 'step': 4883, 'epoch': 1} {'type': 'loss', 'content': 0.16728100180625916, 'timestamp': '2025-09-30 22:17:59.085019', 'step': 4884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:59.126636', 'step': 4884, 'epoch': 1} {'type': 'loss', 'content': 0.1459321677684784, 'timestamp': '2025-09-30 22:17:59.133169', 'step': 4885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.167265', 'step': 4885, 'epoch': 1} {'type': 'loss', 'content': 0.1949234902858734, 'timestamp': '2025-09-30 22:17:59.169823', 'step': 4886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.205732', 'step': 4886, 'epoch': 1} {'type': 'loss', 'content': 0.18024368584156036, 'timestamp': '2025-09-30 22:17:59.212274', 'step': 4887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.248932', 'step': 4887, 'epoch': 1} {'type': 'loss', 'content': 0.13966137170791626, 'timestamp': '2025-09-30 22:17:59.276599', 'step': 4888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.310541', 'step': 4888, 'epoch': 1} {'type': 'loss', 'content': 0.18390701711177826, 'timestamp': '2025-09-30 22:17:59.323339', 'step': 4889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.359750', 'step': 4889, 'epoch': 1} {'type': 'loss', 'content': 0.20487788319587708, 'timestamp': '2025-09-30 22:17:59.366783', 'step': 4890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:17:59.401832', 'step': 4890, 'epoch': 1} {'type': 'loss', 'content': 0.16538609564304352, 'timestamp': '2025-09-30 22:17:59.404676', 'step': 4891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.438890', 'step': 4891, 'epoch': 1} {'type': 'loss', 'content': 0.13926561176776886, 'timestamp': '2025-09-30 22:17:59.463369', 'step': 4892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.493899', 'step': 4892, 'epoch': 1} {'type': 'loss', 'content': 0.22160013020038605, 'timestamp': '2025-09-30 22:17:59.496933', 'step': 4893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.531060', 'step': 4893, 'epoch': 1} {'type': 'loss', 'content': 0.18595315515995026, 'timestamp': '2025-09-30 22:17:59.533836', 'step': 4894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:59.568917', 'step': 4894, 'epoch': 1} {'type': 'loss', 'content': 0.27516499161720276, 'timestamp': '2025-09-30 22:17:59.571514', 'step': 4895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.607488', 'step': 4895, 'epoch': 1} {'type': 'loss', 'content': 0.1131136417388916, 'timestamp': '2025-09-30 22:17:59.636458', 'step': 4896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:17:59.670590', 'step': 4896, 'epoch': 1} {'type': 'loss', 'content': 0.1754581183195114, 'timestamp': '2025-09-30 22:17:59.676893', 'step': 4897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:17:59.711827', 'step': 4897, 'epoch': 1} {'type': 'loss', 'content': 0.0889461487531662, 'timestamp': '2025-09-30 22:17:59.714749', 'step': 4898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.748916', 'step': 4898, 'epoch': 1} {'type': 'loss', 'content': 0.17987483739852905, 'timestamp': '2025-09-30 22:17:59.755075', 'step': 4899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:59.795333', 'step': 4899, 'epoch': 1} {'type': 'loss', 'content': 0.1953846514225006, 'timestamp': '2025-09-30 22:17:59.819944', 'step': 4900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.857556', 'step': 4900, 'epoch': 1} {'type': 'loss', 'content': 0.12304165959358215, 'timestamp': '2025-09-30 22:17:59.869728', 'step': 4901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:17:59.904781', 'step': 4901, 'epoch': 1} {'type': 'loss', 'content': 0.16537411510944366, 'timestamp': '2025-09-30 22:17:59.915505', 'step': 4902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:17:59.955607', 'step': 4902, 'epoch': 1} {'type': 'loss', 'content': 0.1849631518125534, 'timestamp': '2025-09-30 22:17:59.965703', 'step': 4903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.005289', 'step': 4903, 'epoch': 1} {'type': 'loss', 'content': 0.12784074246883392, 'timestamp': '2025-09-30 22:18:00.029997', 'step': 4904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.068935', 'step': 4904, 'epoch': 1} {'type': 'loss', 'content': 0.12826167047023773, 'timestamp': '2025-09-30 22:18:00.076639', 'step': 4905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.127421', 'step': 4905, 'epoch': 1} {'type': 'loss', 'content': 0.13728594779968262, 'timestamp': '2025-09-30 22:18:00.132404', 'step': 4906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.164034', 'step': 4906, 'epoch': 1} {'type': 'loss', 'content': 0.13784699141979218, 'timestamp': '2025-09-30 22:18:00.176412', 'step': 4907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:00.208864', 'step': 4907, 'epoch': 1} {'type': 'loss', 'content': 0.17193306982517242, 'timestamp': '2025-09-30 22:18:00.249478', 'step': 4908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.280483', 'step': 4908, 'epoch': 1} {'type': 'loss', 'content': 0.13160015642642975, 'timestamp': '2025-09-30 22:18:00.284559', 'step': 4909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.322365', 'step': 4909, 'epoch': 1} {'type': 'loss', 'content': 0.10339796543121338, 'timestamp': '2025-09-30 22:18:00.326332', 'step': 4910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.357529', 'step': 4910, 'epoch': 1} {'type': 'loss', 'content': 0.2612883448600769, 'timestamp': '2025-09-30 22:18:00.365854', 'step': 4911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.406132', 'step': 4911, 'epoch': 1} {'type': 'loss', 'content': 0.18274448812007904, 'timestamp': '2025-09-30 22:18:00.430287', 'step': 4912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:00.461143', 'step': 4912, 'epoch': 1} {'type': 'loss', 'content': 0.13410048186779022, 'timestamp': '2025-09-30 22:18:00.466009', 'step': 4913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.497761', 'step': 4913, 'epoch': 1} {'type': 'loss', 'content': 0.15512505173683167, 'timestamp': '2025-09-30 22:18:00.500847', 'step': 4914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.544602', 'step': 4914, 'epoch': 1} {'type': 'loss', 'content': 0.18500623106956482, 'timestamp': '2025-09-30 22:18:00.555600', 'step': 4915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.588253', 'step': 4915, 'epoch': 1} {'type': 'loss', 'content': 0.13842596113681793, 'timestamp': '2025-09-30 22:18:00.612626', 'step': 4916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.650670', 'step': 4916, 'epoch': 1} {'type': 'loss', 'content': 0.1654706448316574, 'timestamp': '2025-09-30 22:18:00.653567', 'step': 4917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.684553', 'step': 4917, 'epoch': 1} {'type': 'loss', 'content': 0.11272643506526947, 'timestamp': '2025-09-30 22:18:00.692114', 'step': 4918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.731815', 'step': 4918, 'epoch': 1} {'type': 'loss', 'content': 0.13869987428188324, 'timestamp': '2025-09-30 22:18:00.740713', 'step': 4919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.774381', 'step': 4919, 'epoch': 1} {'type': 'loss', 'content': 0.11883572489023209, 'timestamp': '2025-09-30 22:18:00.804151', 'step': 4920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:00.843913', 'step': 4920, 'epoch': 1} {'type': 'loss', 'content': 0.15430954098701477, 'timestamp': '2025-09-30 22:18:00.847158', 'step': 4921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:00.885875', 'step': 4921, 'epoch': 1} {'type': 'loss', 'content': 0.09900316596031189, 'timestamp': '2025-09-30 22:18:00.894505', 'step': 4922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:00.933536', 'step': 4922, 'epoch': 1} {'type': 'loss', 'content': 0.17374514043331146, 'timestamp': '2025-09-30 22:18:00.936361', 'step': 4923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:00.968157', 'step': 4923, 'epoch': 1} {'type': 'loss', 'content': 0.11216214299201965, 'timestamp': '2025-09-30 22:18:01.006744', 'step': 4924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.046465', 'step': 4924, 'epoch': 1} {'type': 'loss', 'content': 0.13631507754325867, 'timestamp': '2025-09-30 22:18:01.055926', 'step': 4925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.103355', 'step': 4925, 'epoch': 1} {'type': 'loss', 'content': 0.16172140836715698, 'timestamp': '2025-09-30 22:18:01.111027', 'step': 4926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:01.149707', 'step': 4926, 'epoch': 1} {'type': 'loss', 'content': 0.27753978967666626, 'timestamp': '2025-09-30 22:18:01.158429', 'step': 4927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.196685', 'step': 4927, 'epoch': 1} {'type': 'loss', 'content': 0.16605493426322937, 'timestamp': '2025-09-30 22:18:01.221216', 'step': 4928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.267631', 'step': 4928, 'epoch': 1} {'type': 'loss', 'content': 0.18020164966583252, 'timestamp': '2025-09-30 22:18:01.274755', 'step': 4929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:01.306827', 'step': 4929, 'epoch': 1} {'type': 'loss', 'content': 0.13013562560081482, 'timestamp': '2025-09-30 22:18:01.312030', 'step': 4930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.350597', 'step': 4930, 'epoch': 1} {'type': 'loss', 'content': 0.13141928613185883, 'timestamp': '2025-09-30 22:18:01.359355', 'step': 4931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.390655', 'step': 4931, 'epoch': 1} {'type': 'loss', 'content': 0.126040518283844, 'timestamp': '2025-09-30 22:18:01.418657', 'step': 4932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.451956', 'step': 4932, 'epoch': 1} {'type': 'loss', 'content': 0.08828151971101761, 'timestamp': '2025-09-30 22:18:01.458090', 'step': 4933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.490876', 'step': 4933, 'epoch': 1} {'type': 'loss', 'content': 0.16297373175621033, 'timestamp': '2025-09-30 22:18:01.493444', 'step': 4934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:01.533592', 'step': 4934, 'epoch': 1} {'type': 'loss', 'content': 0.14615698158740997, 'timestamp': '2025-09-30 22:18:01.539899', 'step': 4935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:01.575048', 'step': 4935, 'epoch': 1} {'type': 'loss', 'content': 0.17392705380916595, 'timestamp': '2025-09-30 22:18:01.604991', 'step': 4936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:01.639721', 'step': 4936, 'epoch': 1} {'type': 'loss', 'content': 0.1626683473587036, 'timestamp': '2025-09-30 22:18:01.643529', 'step': 4937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.675283', 'step': 4937, 'epoch': 1} {'type': 'loss', 'content': 0.15367308259010315, 'timestamp': '2025-09-30 22:18:01.681415', 'step': 4938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.716422', 'step': 4938, 'epoch': 1} {'type': 'loss', 'content': 0.15682965517044067, 'timestamp': '2025-09-30 22:18:01.725706', 'step': 4939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.758805', 'step': 4939, 'epoch': 1} {'type': 'loss', 'content': 0.0962148904800415, 'timestamp': '2025-09-30 22:18:01.783429', 'step': 4940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.814965', 'step': 4940, 'epoch': 1} {'type': 'loss', 'content': 0.1235039234161377, 'timestamp': '2025-09-30 22:18:01.823893', 'step': 4941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:01.861554', 'step': 4941, 'epoch': 1} {'type': 'loss', 'content': 0.22405390441417694, 'timestamp': '2025-09-30 22:18:01.864562', 'step': 4942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:01.902569', 'step': 4942, 'epoch': 1} {'type': 'loss', 'content': 0.09626481682062149, 'timestamp': '2025-09-30 22:18:01.905121', 'step': 4943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:01.936964', 'step': 4943, 'epoch': 1} {'type': 'loss', 'content': 0.2465812712907791, 'timestamp': '2025-09-30 22:18:01.971063', 'step': 4944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:02.002639', 'step': 4944, 'epoch': 1} {'type': 'loss', 'content': 0.15190458297729492, 'timestamp': '2025-09-30 22:18:02.012937', 'step': 4945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:02.052770', 'step': 4945, 'epoch': 1} {'type': 'loss', 'content': 0.32152822613716125, 'timestamp': '2025-09-30 22:18:02.066292', 'step': 4946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:02.105294', 'step': 4946, 'epoch': 1} {'type': 'loss', 'content': 0.1974572390317917, 'timestamp': '2025-09-30 22:18:02.108364', 'step': 4947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.140997', 'step': 4947, 'epoch': 1} {'type': 'loss', 'content': 0.17421291768550873, 'timestamp': '2025-09-30 22:18:02.169718', 'step': 4948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.209170', 'step': 4948, 'epoch': 1} {'type': 'loss', 'content': 0.1836119145154953, 'timestamp': '2025-09-30 22:18:02.213421', 'step': 4949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.246298', 'step': 4949, 'epoch': 1} {'type': 'loss', 'content': 0.08468038588762283, 'timestamp': '2025-09-30 22:18:02.256915', 'step': 4950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:02.288647', 'step': 4950, 'epoch': 1} {'type': 'loss', 'content': 0.12054231017827988, 'timestamp': '2025-09-30 22:18:02.295897', 'step': 4951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:02.338400', 'step': 4951, 'epoch': 1} {'type': 'loss', 'content': 0.1519259661436081, 'timestamp': '2025-09-30 22:18:02.362423', 'step': 4952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.400357', 'step': 4952, 'epoch': 1} {'type': 'loss', 'content': 0.18039801716804504, 'timestamp': '2025-09-30 22:18:02.403080', 'step': 4953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.434629', 'step': 4953, 'epoch': 1} {'type': 'loss', 'content': 0.12340869754552841, 'timestamp': '2025-09-30 22:18:02.445164', 'step': 4954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:02.481829', 'step': 4954, 'epoch': 1} {'type': 'loss', 'content': 0.1518511027097702, 'timestamp': '2025-09-30 22:18:02.484544', 'step': 4955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.520779', 'step': 4955, 'epoch': 1} {'type': 'loss', 'content': 0.24539007246494293, 'timestamp': '2025-09-30 22:18:02.545456', 'step': 4956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:02.577290', 'step': 4956, 'epoch': 1} {'type': 'loss', 'content': 0.1214989572763443, 'timestamp': '2025-09-30 22:18:02.584611', 'step': 4957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:02.623899', 'step': 4957, 'epoch': 1} {'type': 'loss', 'content': 0.15077492594718933, 'timestamp': '2025-09-30 22:18:02.626935', 'step': 4958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.659233', 'step': 4958, 'epoch': 1} {'type': 'loss', 'content': 0.2062234878540039, 'timestamp': '2025-09-30 22:18:02.667941', 'step': 4959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.705894', 'step': 4959, 'epoch': 1} {'type': 'loss', 'content': 0.1510283350944519, 'timestamp': '2025-09-30 22:18:02.737144', 'step': 4960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:02.778826', 'step': 4960, 'epoch': 1} {'type': 'loss', 'content': 0.13413871824741364, 'timestamp': '2025-09-30 22:18:02.791502', 'step': 4961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.829848', 'step': 4961, 'epoch': 1} {'type': 'loss', 'content': 0.17624187469482422, 'timestamp': '2025-09-30 22:18:02.836687', 'step': 4962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:02.872089', 'step': 4962, 'epoch': 1} {'type': 'loss', 'content': 0.13453063368797302, 'timestamp': '2025-09-30 22:18:02.881112', 'step': 4963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.912141', 'step': 4963, 'epoch': 1} {'type': 'loss', 'content': 0.12838470935821533, 'timestamp': '2025-09-30 22:18:02.943545', 'step': 4964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:02.977792', 'step': 4964, 'epoch': 1} {'type': 'loss', 'content': 0.10486555099487305, 'timestamp': '2025-09-30 22:18:02.987584', 'step': 4965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:03.027895', 'step': 4965, 'epoch': 1} {'type': 'loss', 'content': 0.20428723096847534, 'timestamp': '2025-09-30 22:18:03.031684', 'step': 4966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:03.063915', 'step': 4966, 'epoch': 1} {'type': 'loss', 'content': 0.1624300479888916, 'timestamp': '2025-09-30 22:18:03.075129', 'step': 4967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:03.107914', 'step': 4967, 'epoch': 1} {'type': 'loss', 'content': 0.07134156674146652, 'timestamp': '2025-09-30 22:18:03.132720', 'step': 4968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:03.163623', 'step': 4968, 'epoch': 1} {'type': 'loss', 'content': 0.2582418918609619, 'timestamp': '2025-09-30 22:18:03.166937', 'step': 4969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:03.201478', 'step': 4969, 'epoch': 1} {'type': 'loss', 'content': 0.1640653908252716, 'timestamp': '2025-09-30 22:18:03.208889', 'step': 4970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:03.244716', 'step': 4970, 'epoch': 1} {'type': 'loss', 'content': 0.16141867637634277, 'timestamp': '2025-09-30 22:18:03.254993', 'step': 4971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:03.294015', 'step': 4971, 'epoch': 1} {'type': 'loss', 'content': 0.10104023665189743, 'timestamp': '2025-09-30 22:18:03.322335', 'step': 4972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:03.355051', 'step': 4972, 'epoch': 1} {'type': 'loss', 'content': 0.13839411735534668, 'timestamp': '2025-09-30 22:18:03.357932', 'step': 4973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:03.395366', 'step': 4973, 'epoch': 1} {'type': 'loss', 'content': 0.18552140891551971, 'timestamp': '2025-09-30 22:18:03.398267', 'step': 4974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:03.433691', 'step': 4974, 'epoch': 1} {'type': 'loss', 'content': 0.09856919944286346, 'timestamp': '2025-09-30 22:18:03.441302', 'step': 4975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:03.476490', 'step': 4975, 'epoch': 1} {'type': 'loss', 'content': 0.09479455649852753, 'timestamp': '2025-09-30 22:18:03.502694', 'step': 4976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:03.540219', 'step': 4976, 'epoch': 1} {'type': 'loss', 'content': 0.1562652885913849, 'timestamp': '2025-09-30 22:18:03.542536', 'step': 4977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:03.578154', 'step': 4977, 'epoch': 1} {'type': 'loss', 'content': 0.12049688398838043, 'timestamp': '2025-09-30 22:18:03.581823', 'step': 4978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:03.613664', 'step': 4978, 'epoch': 1} {'type': 'loss', 'content': 0.2873794138431549, 'timestamp': '2025-09-30 22:18:03.616885', 'step': 4979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:03.654976', 'step': 4979, 'epoch': 1} {'type': 'loss', 'content': 0.11949574202299118, 'timestamp': '2025-09-30 22:18:03.684151', 'step': 4980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:03.721088', 'step': 4980, 'epoch': 1} {'type': 'loss', 'content': 0.13819703459739685, 'timestamp': '2025-09-30 22:18:03.723851', 'step': 4981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:03.760394', 'step': 4981, 'epoch': 1} {'type': 'loss', 'content': 0.1087726503610611, 'timestamp': '2025-09-30 22:18:03.763659', 'step': 4982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:03.798471', 'step': 4982, 'epoch': 1} {'type': 'loss', 'content': 0.0931544080376625, 'timestamp': '2025-09-30 22:18:03.803919', 'step': 4983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:03.843851', 'step': 4983, 'epoch': 1} {'type': 'loss', 'content': 0.19202673435211182, 'timestamp': '2025-09-30 22:18:03.873892', 'step': 4984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:03.911593', 'step': 4984, 'epoch': 1} {'type': 'loss', 'content': 0.21693290770053864, 'timestamp': '2025-09-30 22:18:03.915240', 'step': 4985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:03.954362', 'step': 4985, 'epoch': 1} {'type': 'loss', 'content': 0.16978439688682556, 'timestamp': '2025-09-30 22:18:03.962903', 'step': 4986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:04.003655', 'step': 4986, 'epoch': 1} {'type': 'loss', 'content': 0.1505233496427536, 'timestamp': '2025-09-30 22:18:04.015574', 'step': 4987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:04.047101', 'step': 4987, 'epoch': 1} {'type': 'loss', 'content': 0.10759701579809189, 'timestamp': '2025-09-30 22:18:04.073352', 'step': 4988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.106040', 'step': 4988, 'epoch': 1} {'type': 'loss', 'content': 0.0932888314127922, 'timestamp': '2025-09-30 22:18:04.109927', 'step': 4989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.154107', 'step': 4989, 'epoch': 1} {'type': 'loss', 'content': 0.11503712832927704, 'timestamp': '2025-09-30 22:18:04.158234', 'step': 4990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.199054', 'step': 4990, 'epoch': 1} {'type': 'loss', 'content': 0.22776532173156738, 'timestamp': '2025-09-30 22:18:04.212790', 'step': 4991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:04.244978', 'step': 4991, 'epoch': 1} {'type': 'loss', 'content': 0.23464979231357574, 'timestamp': '2025-09-30 22:18:04.269745', 'step': 4992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.309158', 'step': 4992, 'epoch': 1} {'type': 'loss', 'content': 0.12360896915197372, 'timestamp': '2025-09-30 22:18:04.313464', 'step': 4993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:04.345354', 'step': 4993, 'epoch': 1} {'type': 'loss', 'content': 0.2461940050125122, 'timestamp': '2025-09-30 22:18:04.356565', 'step': 4994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:04.388891', 'step': 4994, 'epoch': 1} {'type': 'loss', 'content': 0.16007129848003387, 'timestamp': '2025-09-30 22:18:04.399674', 'step': 4995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:04.442238', 'step': 4995, 'epoch': 1} {'type': 'loss', 'content': 0.1414009928703308, 'timestamp': '2025-09-30 22:18:04.466790', 'step': 4996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.505934', 'step': 4996, 'epoch': 1} {'type': 'loss', 'content': 0.13399523496627808, 'timestamp': '2025-09-30 22:18:04.514701', 'step': 4997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:04.546944', 'step': 4997, 'epoch': 1} {'type': 'loss', 'content': 0.14300352334976196, 'timestamp': '2025-09-30 22:18:04.550629', 'step': 4998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:04.582963', 'step': 4998, 'epoch': 1} {'type': 'loss', 'content': 0.24245066940784454, 'timestamp': '2025-09-30 22:18:04.589305', 'step': 4999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:18:04.626573', 'step': 4999, 'epoch': 1} {'type': 'loss', 'content': 0.12363883852958679, 'timestamp': '2025-09-30 22:18:04.652137', 'step': 5000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5000', 'timestamp': '2025-09-30 22:18:09.682785', 'step': 5000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:09.715589', 'step': 5000, 'epoch': 1} {'type': 'loss', 'content': 0.13332673907279968, 'timestamp': '2025-09-30 22:18:09.720061', 'step': 5001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:09.751502', 'step': 5001, 'epoch': 1} {'type': 'loss', 'content': 0.13108979165554047, 'timestamp': '2025-09-30 22:18:09.754381', 'step': 5002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:09.800393', 'step': 5002, 'epoch': 1} {'type': 'loss', 'content': 0.1577255129814148, 'timestamp': '2025-09-30 22:18:09.803158', 'step': 5003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:09.834161', 'step': 5003, 'epoch': 1} {'type': 'loss', 'content': 0.1327463537454605, 'timestamp': '2025-09-30 22:18:09.859776', 'step': 5004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:09.897379', 'step': 5004, 'epoch': 1} {'type': 'loss', 'content': 0.16328097879886627, 'timestamp': '2025-09-30 22:18:09.904484', 'step': 5005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:09.942005', 'step': 5005, 'epoch': 1} {'type': 'loss', 'content': 0.19648565351963043, 'timestamp': '2025-09-30 22:18:09.950729', 'step': 5006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:09.993110', 'step': 5006, 'epoch': 1} {'type': 'loss', 'content': 0.15062189102172852, 'timestamp': '2025-09-30 22:18:09.996146', 'step': 5007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.033423', 'step': 5007, 'epoch': 1} {'type': 'loss', 'content': 0.15729251503944397, 'timestamp': '2025-09-30 22:18:10.065329', 'step': 5008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:10.102452', 'step': 5008, 'epoch': 1} {'type': 'loss', 'content': 0.1615738719701767, 'timestamp': '2025-09-30 22:18:10.105328', 'step': 5009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.136862', 'step': 5009, 'epoch': 1} {'type': 'loss', 'content': 0.15007035434246063, 'timestamp': '2025-09-30 22:18:10.148400', 'step': 5010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.179190', 'step': 5010, 'epoch': 1} {'type': 'loss', 'content': 0.2090633064508438, 'timestamp': '2025-09-30 22:18:10.193868', 'step': 5011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.229413', 'step': 5011, 'epoch': 1} {'type': 'loss', 'content': 0.13631287217140198, 'timestamp': '2025-09-30 22:18:10.254456', 'step': 5012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.285277', 'step': 5012, 'epoch': 1} {'type': 'loss', 'content': 0.17724797129631042, 'timestamp': '2025-09-30 22:18:10.289320', 'step': 5013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:10.327586', 'step': 5013, 'epoch': 1} {'type': 'loss', 'content': 0.14932802319526672, 'timestamp': '2025-09-30 22:18:10.330626', 'step': 5014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.363689', 'step': 5014, 'epoch': 1} {'type': 'loss', 'content': 0.09968488663434982, 'timestamp': '2025-09-30 22:18:10.366772', 'step': 5015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:10.398656', 'step': 5015, 'epoch': 1} {'type': 'loss', 'content': 0.15063753724098206, 'timestamp': '2025-09-30 22:18:10.431437', 'step': 5016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:10.467542', 'step': 5016, 'epoch': 1} {'type': 'loss', 'content': 0.2330971360206604, 'timestamp': '2025-09-30 22:18:10.471745', 'step': 5017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:10.509525', 'step': 5017, 'epoch': 1} {'type': 'loss', 'content': 0.16548864543437958, 'timestamp': '2025-09-30 22:18:10.521582', 'step': 5018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.564560', 'step': 5018, 'epoch': 1} {'type': 'loss', 'content': 0.13747914135456085, 'timestamp': '2025-09-30 22:18:10.576142', 'step': 5019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.614303', 'step': 5019, 'epoch': 1} {'type': 'loss', 'content': 0.20395617187023163, 'timestamp': '2025-09-30 22:18:10.644798', 'step': 5020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.682280', 'step': 5020, 'epoch': 1} {'type': 'loss', 'content': 0.0910135805606842, 'timestamp': '2025-09-30 22:18:10.686243', 'step': 5021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:10.733455', 'step': 5021, 'epoch': 1} {'type': 'loss', 'content': 0.1424071192741394, 'timestamp': '2025-09-30 22:18:10.737877', 'step': 5022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.779266', 'step': 5022, 'epoch': 1} {'type': 'loss', 'content': 0.10226727277040482, 'timestamp': '2025-09-30 22:18:10.783048', 'step': 5023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:10.814590', 'step': 5023, 'epoch': 1} {'type': 'loss', 'content': 0.13947971165180206, 'timestamp': '2025-09-30 22:18:10.841183', 'step': 5024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:10.871819', 'step': 5024, 'epoch': 1} {'type': 'loss', 'content': 0.1254681795835495, 'timestamp': '2025-09-30 22:18:10.875612', 'step': 5025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:10.914206', 'step': 5025, 'epoch': 1} {'type': 'loss', 'content': 0.178243488073349, 'timestamp': '2025-09-30 22:18:10.917267', 'step': 5026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:10.957821', 'step': 5026, 'epoch': 1} {'type': 'loss', 'content': 0.22824479639530182, 'timestamp': '2025-09-30 22:18:10.968099', 'step': 5027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.009891', 'step': 5027, 'epoch': 1} {'type': 'loss', 'content': 0.19719886779785156, 'timestamp': '2025-09-30 22:18:11.033910', 'step': 5028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.065293', 'step': 5028, 'epoch': 1} {'type': 'loss', 'content': 0.1768760085105896, 'timestamp': '2025-09-30 22:18:11.078086', 'step': 5029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:11.118334', 'step': 5029, 'epoch': 1} {'type': 'loss', 'content': 0.1529417335987091, 'timestamp': '2025-09-30 22:18:11.121449', 'step': 5030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.160939', 'step': 5030, 'epoch': 1} {'type': 'loss', 'content': 0.1324075162410736, 'timestamp': '2025-09-30 22:18:11.163592', 'step': 5031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.200340', 'step': 5031, 'epoch': 1} {'type': 'loss', 'content': 0.13633129000663757, 'timestamp': '2025-09-30 22:18:11.224849', 'step': 5032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.256589', 'step': 5032, 'epoch': 1} {'type': 'loss', 'content': 0.17668618261814117, 'timestamp': '2025-09-30 22:18:11.267666', 'step': 5033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:11.298809', 'step': 5033, 'epoch': 1} {'type': 'loss', 'content': 0.15225961804389954, 'timestamp': '2025-09-30 22:18:11.303555', 'step': 5034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.336104', 'step': 5034, 'epoch': 1} {'type': 'loss', 'content': 0.09025682508945465, 'timestamp': '2025-09-30 22:18:11.339349', 'step': 5035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.370965', 'step': 5035, 'epoch': 1} {'type': 'loss', 'content': 0.304122656583786, 'timestamp': '2025-09-30 22:18:11.395990', 'step': 5036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:11.428795', 'step': 5036, 'epoch': 1} {'type': 'loss', 'content': 0.12692071497440338, 'timestamp': '2025-09-30 22:18:11.440244', 'step': 5037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.472557', 'step': 5037, 'epoch': 1} {'type': 'loss', 'content': 0.21022480726242065, 'timestamp': '2025-09-30 22:18:11.483814', 'step': 5038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.523741', 'step': 5038, 'epoch': 1} {'type': 'loss', 'content': 0.26080217957496643, 'timestamp': '2025-09-30 22:18:11.529416', 'step': 5039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.561598', 'step': 5039, 'epoch': 1} {'type': 'loss', 'content': 0.18608763813972473, 'timestamp': '2025-09-30 22:18:11.588775', 'step': 5040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.619776', 'step': 5040, 'epoch': 1} {'type': 'loss', 'content': 0.13738073408603668, 'timestamp': '2025-09-30 22:18:11.623752', 'step': 5041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:11.655067', 'step': 5041, 'epoch': 1} {'type': 'loss', 'content': 0.14580464363098145, 'timestamp': '2025-09-30 22:18:11.660426', 'step': 5042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.705204', 'step': 5042, 'epoch': 1} {'type': 'loss', 'content': 0.20768409967422485, 'timestamp': '2025-09-30 22:18:11.721015', 'step': 5043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:11.754098', 'step': 5043, 'epoch': 1} {'type': 'loss', 'content': 0.19174638390541077, 'timestamp': '2025-09-30 22:18:11.779274', 'step': 5044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:11.810818', 'step': 5044, 'epoch': 1} {'type': 'loss', 'content': 0.20625166594982147, 'timestamp': '2025-09-30 22:18:11.826456', 'step': 5045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:11.865586', 'step': 5045, 'epoch': 1} {'type': 'loss', 'content': 0.12724022567272186, 'timestamp': '2025-09-30 22:18:11.869008', 'step': 5046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:11.904949', 'step': 5046, 'epoch': 1} {'type': 'loss', 'content': 0.2273220270872116, 'timestamp': '2025-09-30 22:18:11.907757', 'step': 5047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:11.952508', 'step': 5047, 'epoch': 1} {'type': 'loss', 'content': 0.13623535633087158, 'timestamp': '2025-09-30 22:18:11.976728', 'step': 5048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:12.011208', 'step': 5048, 'epoch': 1} {'type': 'loss', 'content': 0.13378408551216125, 'timestamp': '2025-09-30 22:18:12.015010', 'step': 5049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.045408', 'step': 5049, 'epoch': 1} {'type': 'loss', 'content': 0.1441831886768341, 'timestamp': '2025-09-30 22:18:12.053885', 'step': 5050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.085991', 'step': 5050, 'epoch': 1} {'type': 'loss', 'content': 0.20024611055850983, 'timestamp': '2025-09-30 22:18:12.090031', 'step': 5051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.121883', 'step': 5051, 'epoch': 1} {'type': 'loss', 'content': 0.1845104843378067, 'timestamp': '2025-09-30 22:18:12.147960', 'step': 5052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.190168', 'step': 5052, 'epoch': 1} {'type': 'loss', 'content': 0.1439531296491623, 'timestamp': '2025-09-30 22:18:12.197562', 'step': 5053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:12.235458', 'step': 5053, 'epoch': 1} {'type': 'loss', 'content': 0.13582181930541992, 'timestamp': '2025-09-30 22:18:12.239899', 'step': 5054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:12.284581', 'step': 5054, 'epoch': 1} {'type': 'loss', 'content': 0.18860220909118652, 'timestamp': '2025-09-30 22:18:12.292236', 'step': 5055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.328163', 'step': 5055, 'epoch': 1} {'type': 'loss', 'content': 0.08209571987390518, 'timestamp': '2025-09-30 22:18:12.356682', 'step': 5056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:12.391345', 'step': 5056, 'epoch': 1} {'type': 'loss', 'content': 0.3097938895225525, 'timestamp': '2025-09-30 22:18:12.394607', 'step': 5057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.428733', 'step': 5057, 'epoch': 1} {'type': 'loss', 'content': 0.10904022306203842, 'timestamp': '2025-09-30 22:18:12.436514', 'step': 5058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:12.473854', 'step': 5058, 'epoch': 1} {'type': 'loss', 'content': 0.1402682363986969, 'timestamp': '2025-09-30 22:18:12.480468', 'step': 5059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:12.514472', 'step': 5059, 'epoch': 1} {'type': 'loss', 'content': 0.07032717764377594, 'timestamp': '2025-09-30 22:18:12.542070', 'step': 5060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.575662', 'step': 5060, 'epoch': 1} {'type': 'loss', 'content': 0.09313729405403137, 'timestamp': '2025-09-30 22:18:12.579039', 'step': 5061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.614095', 'step': 5061, 'epoch': 1} {'type': 'loss', 'content': 0.16267500817775726, 'timestamp': '2025-09-30 22:18:12.621685', 'step': 5062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.672258', 'step': 5062, 'epoch': 1} {'type': 'loss', 'content': 0.13414333760738373, 'timestamp': '2025-09-30 22:18:12.676133', 'step': 5063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.711050', 'step': 5063, 'epoch': 1} {'type': 'loss', 'content': 0.228592649102211, 'timestamp': '2025-09-30 22:18:12.738474', 'step': 5064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.774158', 'step': 5064, 'epoch': 1} {'type': 'loss', 'content': 0.1535140872001648, 'timestamp': '2025-09-30 22:18:12.777140', 'step': 5065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.808551', 'step': 5065, 'epoch': 1} {'type': 'loss', 'content': 0.22809359431266785, 'timestamp': '2025-09-30 22:18:12.816652', 'step': 5066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:12.851964', 'step': 5066, 'epoch': 1} {'type': 'loss', 'content': 0.22175410389900208, 'timestamp': '2025-09-30 22:18:12.862004', 'step': 5067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:12.899146', 'step': 5067, 'epoch': 1} {'type': 'loss', 'content': 0.13098709285259247, 'timestamp': '2025-09-30 22:18:12.928260', 'step': 5068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:12.960305', 'step': 5068, 'epoch': 1} {'type': 'loss', 'content': 0.16657628118991852, 'timestamp': '2025-09-30 22:18:12.963275', 'step': 5069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.000658', 'step': 5069, 'epoch': 1} {'type': 'loss', 'content': 0.20248015224933624, 'timestamp': '2025-09-30 22:18:13.008116', 'step': 5070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.045556', 'step': 5070, 'epoch': 1} {'type': 'loss', 'content': 0.13193848729133606, 'timestamp': '2025-09-30 22:18:13.050965', 'step': 5071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.087557', 'step': 5071, 'epoch': 1} {'type': 'loss', 'content': 0.05307883396744728, 'timestamp': '2025-09-30 22:18:13.116976', 'step': 5072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.152320', 'step': 5072, 'epoch': 1} {'type': 'loss', 'content': 0.1243002712726593, 'timestamp': '2025-09-30 22:18:13.155230', 'step': 5073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:13.190268', 'step': 5073, 'epoch': 1} {'type': 'loss', 'content': 0.15952694416046143, 'timestamp': '2025-09-30 22:18:13.193728', 'step': 5074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.223771', 'step': 5074, 'epoch': 1} {'type': 'loss', 'content': 0.1278303563594818, 'timestamp': '2025-09-30 22:18:13.229424', 'step': 5075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.266557', 'step': 5075, 'epoch': 1} {'type': 'loss', 'content': 0.1749868094921112, 'timestamp': '2025-09-30 22:18:13.291498', 'step': 5076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.326161', 'step': 5076, 'epoch': 1} {'type': 'loss', 'content': 0.11864295601844788, 'timestamp': '2025-09-30 22:18:13.329779', 'step': 5077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.361560', 'step': 5077, 'epoch': 1} {'type': 'loss', 'content': 0.1329914778470993, 'timestamp': '2025-09-30 22:18:13.369039', 'step': 5078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:13.400168', 'step': 5078, 'epoch': 1} {'type': 'loss', 'content': 0.19426563382148743, 'timestamp': '2025-09-30 22:18:13.415054', 'step': 5079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.454262', 'step': 5079, 'epoch': 1} {'type': 'loss', 'content': 0.10737717151641846, 'timestamp': '2025-09-30 22:18:13.479202', 'step': 5080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:13.515298', 'step': 5080, 'epoch': 1} {'type': 'loss', 'content': 0.16552773118019104, 'timestamp': '2025-09-30 22:18:13.517909', 'step': 5081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.549844', 'step': 5081, 'epoch': 1} {'type': 'loss', 'content': 0.20572291314601898, 'timestamp': '2025-09-30 22:18:13.558223', 'step': 5082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:13.594519', 'step': 5082, 'epoch': 1} {'type': 'loss', 'content': 0.13467785716056824, 'timestamp': '2025-09-30 22:18:13.599901', 'step': 5083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.634105', 'step': 5083, 'epoch': 1} {'type': 'loss', 'content': 0.11735887080430984, 'timestamp': '2025-09-30 22:18:13.660943', 'step': 5084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.691861', 'step': 5084, 'epoch': 1} {'type': 'loss', 'content': 0.1595516949892044, 'timestamp': '2025-09-30 22:18:13.697218', 'step': 5085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.740319', 'step': 5085, 'epoch': 1} {'type': 'loss', 'content': 0.10055841505527496, 'timestamp': '2025-09-30 22:18:13.743080', 'step': 5086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:13.773645', 'step': 5086, 'epoch': 1} {'type': 'loss', 'content': 0.22579720616340637, 'timestamp': '2025-09-30 22:18:13.786684', 'step': 5087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:13.829112', 'step': 5087, 'epoch': 1} {'type': 'loss', 'content': 0.11920777708292007, 'timestamp': '2025-09-30 22:18:13.856410', 'step': 5088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:13.887483', 'step': 5088, 'epoch': 1} {'type': 'loss', 'content': 0.10162407159805298, 'timestamp': '2025-09-30 22:18:13.891247', 'step': 5089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.923331', 'step': 5089, 'epoch': 1} {'type': 'loss', 'content': 0.22616931796073914, 'timestamp': '2025-09-30 22:18:13.931416', 'step': 5090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:13.968803', 'step': 5090, 'epoch': 1} {'type': 'loss', 'content': 0.24699845910072327, 'timestamp': '2025-09-30 22:18:13.972992', 'step': 5091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:14.004004', 'step': 5091, 'epoch': 1} {'type': 'loss', 'content': 0.2524351179599762, 'timestamp': '2025-09-30 22:18:14.035514', 'step': 5092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.072392', 'step': 5092, 'epoch': 1} {'type': 'loss', 'content': 0.09563563764095306, 'timestamp': '2025-09-30 22:18:14.083836', 'step': 5093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.115910', 'step': 5093, 'epoch': 1} {'type': 'loss', 'content': 0.14208199083805084, 'timestamp': '2025-09-30 22:18:14.123338', 'step': 5094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.166541', 'step': 5094, 'epoch': 1} {'type': 'loss', 'content': 0.21007147431373596, 'timestamp': '2025-09-30 22:18:14.171230', 'step': 5095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.205238', 'step': 5095, 'epoch': 1} {'type': 'loss', 'content': 0.21103058755397797, 'timestamp': '2025-09-30 22:18:14.230499', 'step': 5096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.264338', 'step': 5096, 'epoch': 1} {'type': 'loss', 'content': 0.11874250322580338, 'timestamp': '2025-09-30 22:18:14.268787', 'step': 5097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.300931', 'step': 5097, 'epoch': 1} {'type': 'loss', 'content': 0.13056686520576477, 'timestamp': '2025-09-30 22:18:14.304590', 'step': 5098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.336910', 'step': 5098, 'epoch': 1} {'type': 'loss', 'content': 0.17863622307777405, 'timestamp': '2025-09-30 22:18:14.343336', 'step': 5099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:14.380857', 'step': 5099, 'epoch': 1} {'type': 'loss', 'content': 0.1888662725687027, 'timestamp': '2025-09-30 22:18:14.405533', 'step': 5100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:14.435851', 'step': 5100, 'epoch': 1} {'type': 'loss', 'content': 0.1658443659543991, 'timestamp': '2025-09-30 22:18:14.438201', 'step': 5101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.469633', 'step': 5101, 'epoch': 1} {'type': 'loss', 'content': 0.19754812121391296, 'timestamp': '2025-09-30 22:18:14.472505', 'step': 5102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.503359', 'step': 5102, 'epoch': 1} {'type': 'loss', 'content': 0.22977209091186523, 'timestamp': '2025-09-30 22:18:14.507142', 'step': 5103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.540600', 'step': 5103, 'epoch': 1} {'type': 'loss', 'content': 0.22753290832042694, 'timestamp': '2025-09-30 22:18:14.565758', 'step': 5104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.597691', 'step': 5104, 'epoch': 1} {'type': 'loss', 'content': 0.17506462335586548, 'timestamp': '2025-09-30 22:18:14.600343', 'step': 5105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.636868', 'step': 5105, 'epoch': 1} {'type': 'loss', 'content': 0.1244698092341423, 'timestamp': '2025-09-30 22:18:14.640429', 'step': 5106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:14.672856', 'step': 5106, 'epoch': 1} {'type': 'loss', 'content': 0.15542489290237427, 'timestamp': '2025-09-30 22:18:14.676682', 'step': 5107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.708551', 'step': 5107, 'epoch': 1} {'type': 'loss', 'content': 0.15097320079803467, 'timestamp': '2025-09-30 22:18:14.733022', 'step': 5108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.766071', 'step': 5108, 'epoch': 1} {'type': 'loss', 'content': 0.24023516476154327, 'timestamp': '2025-09-30 22:18:14.769182', 'step': 5109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:14.801451', 'step': 5109, 'epoch': 1} {'type': 'loss', 'content': 0.2630220651626587, 'timestamp': '2025-09-30 22:18:14.806258', 'step': 5110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.838465', 'step': 5110, 'epoch': 1} {'type': 'loss', 'content': 0.17080575227737427, 'timestamp': '2025-09-30 22:18:14.841604', 'step': 5111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:14.872564', 'step': 5111, 'epoch': 1} {'type': 'loss', 'content': 0.12167230993509293, 'timestamp': '2025-09-30 22:18:14.896736', 'step': 5112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:14.927081', 'step': 5112, 'epoch': 1} {'type': 'loss', 'content': 0.17405137419700623, 'timestamp': '2025-09-30 22:18:14.930738', 'step': 5113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:14.962677', 'step': 5113, 'epoch': 1} {'type': 'loss', 'content': 0.26315581798553467, 'timestamp': '2025-09-30 22:18:14.967116', 'step': 5114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:15.002568', 'step': 5114, 'epoch': 1} {'type': 'loss', 'content': 0.1444457769393921, 'timestamp': '2025-09-30 22:18:15.004856', 'step': 5115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:15.039411', 'step': 5115, 'epoch': 1} {'type': 'loss', 'content': 0.18072953820228577, 'timestamp': '2025-09-30 22:18:15.063419', 'step': 5116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:15.104335', 'step': 5116, 'epoch': 1} {'type': 'loss', 'content': 0.130597323179245, 'timestamp': '2025-09-30 22:18:15.106988', 'step': 5117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:15.139302', 'step': 5117, 'epoch': 1} {'type': 'loss', 'content': 0.2417924553155899, 'timestamp': '2025-09-30 22:18:15.142884', 'step': 5118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:15.173689', 'step': 5118, 'epoch': 1} {'type': 'loss', 'content': 0.2039923220872879, 'timestamp': '2025-09-30 22:18:15.180076', 'step': 5119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:15.212840', 'step': 5119, 'epoch': 1} {'type': 'loss', 'content': 0.2097506821155548, 'timestamp': '2025-09-30 22:18:15.239038', 'step': 5120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:15.271583', 'step': 5120, 'epoch': 1} {'type': 'loss', 'content': 0.15309898555278778, 'timestamp': '2025-09-30 22:18:15.274587', 'step': 5121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:15.305922', 'step': 5121, 'epoch': 1} {'type': 'loss', 'content': 0.21961279213428497, 'timestamp': '2025-09-30 22:18:15.308339', 'step': 5122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:15.338433', 'step': 5122, 'epoch': 1} {'type': 'loss', 'content': 0.11868871748447418, 'timestamp': '2025-09-30 22:18:15.340905', 'step': 5123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:15.372882', 'step': 5123, 'epoch': 1} {'type': 'loss', 'content': 0.11362197995185852, 'timestamp': '2025-09-30 22:18:15.399338', 'step': 5124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:15.430176', 'step': 5124, 'epoch': 1} {'type': 'loss', 'content': 0.1482965499162674, 'timestamp': '2025-09-30 22:18:15.433660', 'step': 5125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:15.465027', 'step': 5125, 'epoch': 1} {'type': 'loss', 'content': 0.16714856028556824, 'timestamp': '2025-09-30 22:18:15.469827', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:18:23.195350', 'step': 5126, 'epoch': 1} {'type': 'pplx', 'content': 9518.29188021414, 'timestamp': '2025-09-30 22:18:23.202702', 'step': 5126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:23.233625', 'step': 5126, 'epoch': 1} {'type': 'loss', 'content': 0.09716784954071045, 'timestamp': '2025-09-30 22:18:23.235943', 'step': 5127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.268590', 'step': 5127, 'epoch': 1} {'type': 'loss', 'content': 0.24679891765117645, 'timestamp': '2025-09-30 22:18:23.295369', 'step': 5128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.330465', 'step': 5128, 'epoch': 1} {'type': 'loss', 'content': 0.10968150943517685, 'timestamp': '2025-09-30 22:18:23.334927', 'step': 5129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:23.367870', 'step': 5129, 'epoch': 1} {'type': 'loss', 'content': 0.1319594383239746, 'timestamp': '2025-09-30 22:18:23.370291', 'step': 5130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.400726', 'step': 5130, 'epoch': 1} {'type': 'loss', 'content': 0.11071795970201492, 'timestamp': '2025-09-30 22:18:23.405791', 'step': 5131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.437291', 'step': 5131, 'epoch': 1} {'type': 'loss', 'content': 0.11316284537315369, 'timestamp': '2025-09-30 22:18:23.464908', 'step': 5132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:23.497190', 'step': 5132, 'epoch': 1} {'type': 'loss', 'content': 0.1809159368276596, 'timestamp': '2025-09-30 22:18:23.502499', 'step': 5133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:23.534818', 'step': 5133, 'epoch': 1} {'type': 'loss', 'content': 0.1268143355846405, 'timestamp': '2025-09-30 22:18:23.537508', 'step': 5134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:23.568740', 'step': 5134, 'epoch': 1} {'type': 'loss', 'content': 0.1602456122636795, 'timestamp': '2025-09-30 22:18:23.570995', 'step': 5135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.601331', 'step': 5135, 'epoch': 1} {'type': 'loss', 'content': 0.14275528490543365, 'timestamp': '2025-09-30 22:18:23.626365', 'step': 5136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.657729', 'step': 5136, 'epoch': 1} {'type': 'loss', 'content': 0.11764905601739883, 'timestamp': '2025-09-30 22:18:23.660247', 'step': 5137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:23.691877', 'step': 5137, 'epoch': 1} {'type': 'loss', 'content': 0.24197742342948914, 'timestamp': '2025-09-30 22:18:23.695344', 'step': 5138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:23.726628', 'step': 5138, 'epoch': 1} {'type': 'loss', 'content': 0.21726688742637634, 'timestamp': '2025-09-30 22:18:23.728789', 'step': 5139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:23.759630', 'step': 5139, 'epoch': 1} {'type': 'loss', 'content': 0.13526520133018494, 'timestamp': '2025-09-30 22:18:23.784179', 'step': 5140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.814910', 'step': 5140, 'epoch': 1} {'type': 'loss', 'content': 0.14850442111492157, 'timestamp': '2025-09-30 22:18:23.817222', 'step': 5141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:23.850103', 'step': 5141, 'epoch': 1} {'type': 'loss', 'content': 0.13837048411369324, 'timestamp': '2025-09-30 22:18:23.855946', 'step': 5142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:23.901418', 'step': 5142, 'epoch': 1} {'type': 'loss', 'content': 0.11796829104423523, 'timestamp': '2025-09-30 22:18:23.905154', 'step': 5143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.936520', 'step': 5143, 'epoch': 1} {'type': 'loss', 'content': 0.1379266232252121, 'timestamp': '2025-09-30 22:18:23.961122', 'step': 5144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:23.991709', 'step': 5144, 'epoch': 1} {'type': 'loss', 'content': 0.12500272691249847, 'timestamp': '2025-09-30 22:18:23.995045', 'step': 5145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:24.026676', 'step': 5145, 'epoch': 1} {'type': 'loss', 'content': 0.11542858928442001, 'timestamp': '2025-09-30 22:18:24.030228', 'step': 5146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.060854', 'step': 5146, 'epoch': 1} {'type': 'loss', 'content': 0.1181798204779625, 'timestamp': '2025-09-30 22:18:24.064555', 'step': 5147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.099082', 'step': 5147, 'epoch': 1} {'type': 'loss', 'content': 0.19528347253799438, 'timestamp': '2025-09-30 22:18:24.124034', 'step': 5148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.155760', 'step': 5148, 'epoch': 1} {'type': 'loss', 'content': 0.040278032422065735, 'timestamp': '2025-09-30 22:18:24.158072', 'step': 5149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.190727', 'step': 5149, 'epoch': 1} {'type': 'loss', 'content': 0.2123391479253769, 'timestamp': '2025-09-30 22:18:24.193784', 'step': 5150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:24.225653', 'step': 5150, 'epoch': 1} {'type': 'loss', 'content': 0.08070262521505356, 'timestamp': '2025-09-30 22:18:24.229931', 'step': 5151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:24.262442', 'step': 5151, 'epoch': 1} {'type': 'loss', 'content': 0.13738226890563965, 'timestamp': '2025-09-30 22:18:24.288870', 'step': 5152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:24.319442', 'step': 5152, 'epoch': 1} {'type': 'loss', 'content': 0.1613549292087555, 'timestamp': '2025-09-30 22:18:24.323170', 'step': 5153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.354963', 'step': 5153, 'epoch': 1} {'type': 'loss', 'content': 0.14641894400119781, 'timestamp': '2025-09-30 22:18:24.357335', 'step': 5154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:24.390688', 'step': 5154, 'epoch': 1} {'type': 'loss', 'content': 0.1402103155851364, 'timestamp': '2025-09-30 22:18:24.393254', 'step': 5155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:24.427201', 'step': 5155, 'epoch': 1} {'type': 'loss', 'content': 0.10937175154685974, 'timestamp': '2025-09-30 22:18:24.451013', 'step': 5156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.483299', 'step': 5156, 'epoch': 1} {'type': 'loss', 'content': 0.12171164900064468, 'timestamp': '2025-09-30 22:18:24.488385', 'step': 5157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.521807', 'step': 5157, 'epoch': 1} {'type': 'loss', 'content': 0.13427814841270447, 'timestamp': '2025-09-30 22:18:24.526331', 'step': 5158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.562786', 'step': 5158, 'epoch': 1} {'type': 'loss', 'content': 0.14646513760089874, 'timestamp': '2025-09-30 22:18:24.566980', 'step': 5159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.599721', 'step': 5159, 'epoch': 1} {'type': 'loss', 'content': 0.14821672439575195, 'timestamp': '2025-09-30 22:18:24.625360', 'step': 5160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.656168', 'step': 5160, 'epoch': 1} {'type': 'loss', 'content': 0.11692763864994049, 'timestamp': '2025-09-30 22:18:24.662658', 'step': 5161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.696034', 'step': 5161, 'epoch': 1} {'type': 'loss', 'content': 0.1785074770450592, 'timestamp': '2025-09-30 22:18:24.698491', 'step': 5162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:24.729368', 'step': 5162, 'epoch': 1} {'type': 'loss', 'content': 0.21882547438144684, 'timestamp': '2025-09-30 22:18:24.734923', 'step': 5163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.769733', 'step': 5163, 'epoch': 1} {'type': 'loss', 'content': 0.17799048125743866, 'timestamp': '2025-09-30 22:18:24.793869', 'step': 5164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:24.827005', 'step': 5164, 'epoch': 1} {'type': 'loss', 'content': 0.1743350476026535, 'timestamp': '2025-09-30 22:18:24.831368', 'step': 5165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:24.864157', 'step': 5165, 'epoch': 1} {'type': 'loss', 'content': 0.11384202539920807, 'timestamp': '2025-09-30 22:18:24.866875', 'step': 5166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:24.897709', 'step': 5166, 'epoch': 1} {'type': 'loss', 'content': 0.1938851922750473, 'timestamp': '2025-09-30 22:18:24.899852', 'step': 5167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:24.930010', 'step': 5167, 'epoch': 1} {'type': 'loss', 'content': 0.10489805042743683, 'timestamp': '2025-09-30 22:18:24.953686', 'step': 5168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:24.986154', 'step': 5168, 'epoch': 1} {'type': 'loss', 'content': 0.23085838556289673, 'timestamp': '2025-09-30 22:18:24.991508', 'step': 5169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.023959', 'step': 5169, 'epoch': 1} {'type': 'loss', 'content': 0.19146469235420227, 'timestamp': '2025-09-30 22:18:25.027058', 'step': 5170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:25.059081', 'step': 5170, 'epoch': 1} {'type': 'loss', 'content': 0.20485031604766846, 'timestamp': '2025-09-30 22:18:25.067026', 'step': 5171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.101869', 'step': 5171, 'epoch': 1} {'type': 'loss', 'content': 0.17124707996845245, 'timestamp': '2025-09-30 22:18:25.127743', 'step': 5172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.158015', 'step': 5172, 'epoch': 1} {'type': 'loss', 'content': 0.12105870991945267, 'timestamp': '2025-09-30 22:18:25.164343', 'step': 5173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.207424', 'step': 5173, 'epoch': 1} {'type': 'loss', 'content': 0.1492868810892105, 'timestamp': '2025-09-30 22:18:25.211071', 'step': 5174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:25.244171', 'step': 5174, 'epoch': 1} {'type': 'loss', 'content': 0.19287143647670746, 'timestamp': '2025-09-30 22:18:25.247722', 'step': 5175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.282456', 'step': 5175, 'epoch': 1} {'type': 'loss', 'content': 0.10359067469835281, 'timestamp': '2025-09-30 22:18:25.313728', 'step': 5176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:18:25.348221', 'step': 5176, 'epoch': 1} {'type': 'loss', 'content': 0.1332247406244278, 'timestamp': '2025-09-30 22:18:25.351647', 'step': 5177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:25.384602', 'step': 5177, 'epoch': 1} {'type': 'loss', 'content': 0.13584250211715698, 'timestamp': '2025-09-30 22:18:25.388123', 'step': 5178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:25.420275', 'step': 5178, 'epoch': 1} {'type': 'loss', 'content': 0.14374206960201263, 'timestamp': '2025-09-30 22:18:25.424672', 'step': 5179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:25.458765', 'step': 5179, 'epoch': 1} {'type': 'loss', 'content': 0.2207968533039093, 'timestamp': '2025-09-30 22:18:25.485084', 'step': 5180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.517343', 'step': 5180, 'epoch': 1} {'type': 'loss', 'content': 0.1677352488040924, 'timestamp': '2025-09-30 22:18:25.522339', 'step': 5181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:25.554133', 'step': 5181, 'epoch': 1} {'type': 'loss', 'content': 0.16622422635555267, 'timestamp': '2025-09-30 22:18:25.557660', 'step': 5182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.587909', 'step': 5182, 'epoch': 1} {'type': 'loss', 'content': 0.14877372980117798, 'timestamp': '2025-09-30 22:18:25.590354', 'step': 5183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.628940', 'step': 5183, 'epoch': 1} {'type': 'loss', 'content': 0.19171099364757538, 'timestamp': '2025-09-30 22:18:25.653166', 'step': 5184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.684208', 'step': 5184, 'epoch': 1} {'type': 'loss', 'content': 0.20428423583507538, 'timestamp': '2025-09-30 22:18:25.688196', 'step': 5185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.731606', 'step': 5185, 'epoch': 1} {'type': 'loss', 'content': 0.1743352860212326, 'timestamp': '2025-09-30 22:18:25.735918', 'step': 5186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.768297', 'step': 5186, 'epoch': 1} {'type': 'loss', 'content': 0.13630355894565582, 'timestamp': '2025-09-30 22:18:25.772428', 'step': 5187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.805165', 'step': 5187, 'epoch': 1} {'type': 'loss', 'content': 0.23077921569347382, 'timestamp': '2025-09-30 22:18:25.830538', 'step': 5188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.862790', 'step': 5188, 'epoch': 1} {'type': 'loss', 'content': 0.1949584186077118, 'timestamp': '2025-09-30 22:18:25.865747', 'step': 5189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:25.897552', 'step': 5189, 'epoch': 1} {'type': 'loss', 'content': 0.13627289235591888, 'timestamp': '2025-09-30 22:18:25.902281', 'step': 5190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:25.940279', 'step': 5190, 'epoch': 1} {'type': 'loss', 'content': 0.12142874300479889, 'timestamp': '2025-09-30 22:18:25.944050', 'step': 5191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:25.983674', 'step': 5191, 'epoch': 1} {'type': 'loss', 'content': 0.18949997425079346, 'timestamp': '2025-09-30 22:18:26.009965', 'step': 5192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:26.043673', 'step': 5192, 'epoch': 1} {'type': 'loss', 'content': 0.1423400640487671, 'timestamp': '2025-09-30 22:18:26.047290', 'step': 5193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:26.079419', 'step': 5193, 'epoch': 1} {'type': 'loss', 'content': 0.18855735659599304, 'timestamp': '2025-09-30 22:18:26.081712', 'step': 5194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:26.112163', 'step': 5194, 'epoch': 1} {'type': 'loss', 'content': 0.10873803496360779, 'timestamp': '2025-09-30 22:18:26.115557', 'step': 5195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:26.147622', 'step': 5195, 'epoch': 1} {'type': 'loss', 'content': 0.13620448112487793, 'timestamp': '2025-09-30 22:18:26.173926', 'step': 5196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:26.206783', 'step': 5196, 'epoch': 1} {'type': 'loss', 'content': 0.2897462844848633, 'timestamp': '2025-09-30 22:18:26.210263', 'step': 5197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:26.242867', 'step': 5197, 'epoch': 1} {'type': 'loss', 'content': 0.22376742959022522, 'timestamp': '2025-09-30 22:18:26.247978', 'step': 5198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:26.281119', 'step': 5198, 'epoch': 1} {'type': 'loss', 'content': 0.13717679679393768, 'timestamp': '2025-09-30 22:18:26.284607', 'step': 5199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.316437', 'step': 5199, 'epoch': 1} {'type': 'loss', 'content': 0.19797617197036743, 'timestamp': '2025-09-30 22:18:26.341801', 'step': 5200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:26.371664', 'step': 5200, 'epoch': 1} {'type': 'loss', 'content': 0.2541492283344269, 'timestamp': '2025-09-30 22:18:26.376017', 'step': 5201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.407645', 'step': 5201, 'epoch': 1} {'type': 'loss', 'content': 0.1843733936548233, 'timestamp': '2025-09-30 22:18:26.414816', 'step': 5202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:26.449768', 'step': 5202, 'epoch': 1} {'type': 'loss', 'content': 0.13780224323272705, 'timestamp': '2025-09-30 22:18:26.455930', 'step': 5203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:26.490552', 'step': 5203, 'epoch': 1} {'type': 'loss', 'content': 0.11791719496250153, 'timestamp': '2025-09-30 22:18:26.514825', 'step': 5204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.545771', 'step': 5204, 'epoch': 1} {'type': 'loss', 'content': 0.11775169521570206, 'timestamp': '2025-09-30 22:18:26.548503', 'step': 5205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:26.580800', 'step': 5205, 'epoch': 1} {'type': 'loss', 'content': 0.18140959739685059, 'timestamp': '2025-09-30 22:18:26.583837', 'step': 5206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:26.614692', 'step': 5206, 'epoch': 1} {'type': 'loss', 'content': 0.1456497311592102, 'timestamp': '2025-09-30 22:18:26.617720', 'step': 5207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:26.648583', 'step': 5207, 'epoch': 1} {'type': 'loss', 'content': 0.18373404443264008, 'timestamp': '2025-09-30 22:18:26.673236', 'step': 5208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.707036', 'step': 5208, 'epoch': 1} {'type': 'loss', 'content': 0.18164995312690735, 'timestamp': '2025-09-30 22:18:26.710788', 'step': 5209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.741801', 'step': 5209, 'epoch': 1} {'type': 'loss', 'content': 0.1846400499343872, 'timestamp': '2025-09-30 22:18:26.744482', 'step': 5210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:26.775100', 'step': 5210, 'epoch': 1} {'type': 'loss', 'content': 0.14170710742473602, 'timestamp': '2025-09-30 22:18:26.780242', 'step': 5211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.812696', 'step': 5211, 'epoch': 1} {'type': 'loss', 'content': 0.16410382091999054, 'timestamp': '2025-09-30 22:18:26.838896', 'step': 5212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.871837', 'step': 5212, 'epoch': 1} {'type': 'loss', 'content': 0.15156693756580353, 'timestamp': '2025-09-30 22:18:26.874532', 'step': 5213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:26.908884', 'step': 5213, 'epoch': 1} {'type': 'loss', 'content': 0.2071993350982666, 'timestamp': '2025-09-30 22:18:26.915892', 'step': 5214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:26.950534', 'step': 5214, 'epoch': 1} {'type': 'loss', 'content': 0.1265469193458557, 'timestamp': '2025-09-30 22:18:26.956039', 'step': 5215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:26.989149', 'step': 5215, 'epoch': 1} {'type': 'loss', 'content': 0.13733932375907898, 'timestamp': '2025-09-30 22:18:27.015723', 'step': 5216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:27.046669', 'step': 5216, 'epoch': 1} {'type': 'loss', 'content': 0.1548881232738495, 'timestamp': '2025-09-30 22:18:27.048911', 'step': 5217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.084168', 'step': 5217, 'epoch': 1} {'type': 'loss', 'content': 0.1605515480041504, 'timestamp': '2025-09-30 22:18:27.090574', 'step': 5218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:27.125008', 'step': 5218, 'epoch': 1} {'type': 'loss', 'content': 0.11581659317016602, 'timestamp': '2025-09-30 22:18:27.127419', 'step': 5219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.164358', 'step': 5219, 'epoch': 1} {'type': 'loss', 'content': 0.17663583159446716, 'timestamp': '2025-09-30 22:18:27.190207', 'step': 5220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:27.224494', 'step': 5220, 'epoch': 1} {'type': 'loss', 'content': 0.2277403324842453, 'timestamp': '2025-09-30 22:18:27.229015', 'step': 5221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.260052', 'step': 5221, 'epoch': 1} {'type': 'loss', 'content': 0.28339844942092896, 'timestamp': '2025-09-30 22:18:27.264333', 'step': 5222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.294866', 'step': 5222, 'epoch': 1} {'type': 'loss', 'content': 0.09144169837236404, 'timestamp': '2025-09-30 22:18:27.297231', 'step': 5223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.328146', 'step': 5223, 'epoch': 1} {'type': 'loss', 'content': 0.1461113542318344, 'timestamp': '2025-09-30 22:18:27.353668', 'step': 5224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.389270', 'step': 5224, 'epoch': 1} {'type': 'loss', 'content': 0.12579986453056335, 'timestamp': '2025-09-30 22:18:27.392019', 'step': 5225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.424364', 'step': 5225, 'epoch': 1} {'type': 'loss', 'content': 0.11601617932319641, 'timestamp': '2025-09-30 22:18:27.427196', 'step': 5226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.465289', 'step': 5226, 'epoch': 1} {'type': 'loss', 'content': 0.15916205942630768, 'timestamp': '2025-09-30 22:18:27.468193', 'step': 5227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:27.499344', 'step': 5227, 'epoch': 1} {'type': 'loss', 'content': 0.08897855132818222, 'timestamp': '2025-09-30 22:18:27.523566', 'step': 5228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.556191', 'step': 5228, 'epoch': 1} {'type': 'loss', 'content': 0.18676058948040009, 'timestamp': '2025-09-30 22:18:27.558749', 'step': 5229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.592075', 'step': 5229, 'epoch': 1} {'type': 'loss', 'content': 0.18532681465148926, 'timestamp': '2025-09-30 22:18:27.595815', 'step': 5230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:27.627454', 'step': 5230, 'epoch': 1} {'type': 'loss', 'content': 0.074726901948452, 'timestamp': '2025-09-30 22:18:27.630412', 'step': 5231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:27.667276', 'step': 5231, 'epoch': 1} {'type': 'loss', 'content': 0.192801371216774, 'timestamp': '2025-09-30 22:18:27.695986', 'step': 5232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.727380', 'step': 5232, 'epoch': 1} {'type': 'loss', 'content': 0.13830603659152985, 'timestamp': '2025-09-30 22:18:27.731707', 'step': 5233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.766945', 'step': 5233, 'epoch': 1} {'type': 'loss', 'content': 0.25944283604621887, 'timestamp': '2025-09-30 22:18:27.771520', 'step': 5234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.805017', 'step': 5234, 'epoch': 1} {'type': 'loss', 'content': 0.06890556961297989, 'timestamp': '2025-09-30 22:18:27.810908', 'step': 5235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:27.845097', 'step': 5235, 'epoch': 1} {'type': 'loss', 'content': 0.14307814836502075, 'timestamp': '2025-09-30 22:18:27.872116', 'step': 5236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:27.906809', 'step': 5236, 'epoch': 1} {'type': 'loss', 'content': 0.11289079487323761, 'timestamp': '2025-09-30 22:18:27.909879', 'step': 5237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:27.941963', 'step': 5237, 'epoch': 1} {'type': 'loss', 'content': 0.19951042532920837, 'timestamp': '2025-09-30 22:18:27.946997', 'step': 5238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:27.984227', 'step': 5238, 'epoch': 1} {'type': 'loss', 'content': 0.12116371840238571, 'timestamp': '2025-09-30 22:18:27.990787', 'step': 5239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.021531', 'step': 5239, 'epoch': 1} {'type': 'loss', 'content': 0.20638196170330048, 'timestamp': '2025-09-30 22:18:28.050701', 'step': 5240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.083639', 'step': 5240, 'epoch': 1} {'type': 'loss', 'content': 0.14296986162662506, 'timestamp': '2025-09-30 22:18:28.088388', 'step': 5241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:28.120626', 'step': 5241, 'epoch': 1} {'type': 'loss', 'content': 0.15424732863903046, 'timestamp': '2025-09-30 22:18:28.126946', 'step': 5242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:28.160207', 'step': 5242, 'epoch': 1} {'type': 'loss', 'content': 0.122782401740551, 'timestamp': '2025-09-30 22:18:28.162981', 'step': 5243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:28.194806', 'step': 5243, 'epoch': 1} {'type': 'loss', 'content': 0.14807544648647308, 'timestamp': '2025-09-30 22:18:28.218695', 'step': 5244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:28.248920', 'step': 5244, 'epoch': 1} {'type': 'loss', 'content': 0.07129893451929092, 'timestamp': '2025-09-30 22:18:28.251590', 'step': 5245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.285425', 'step': 5245, 'epoch': 1} {'type': 'loss', 'content': 0.13299348950386047, 'timestamp': '2025-09-30 22:18:28.290241', 'step': 5246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:28.322065', 'step': 5246, 'epoch': 1} {'type': 'loss', 'content': 0.19031484425067902, 'timestamp': '2025-09-30 22:18:28.325813', 'step': 5247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:28.361056', 'step': 5247, 'epoch': 1} {'type': 'loss', 'content': 0.13177359104156494, 'timestamp': '2025-09-30 22:18:28.391498', 'step': 5248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.428504', 'step': 5248, 'epoch': 1} {'type': 'loss', 'content': 0.11936169862747192, 'timestamp': '2025-09-30 22:18:28.431482', 'step': 5249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:28.470405', 'step': 5249, 'epoch': 1} {'type': 'loss', 'content': 0.1848946213722229, 'timestamp': '2025-09-30 22:18:28.482003', 'step': 5250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.519103', 'step': 5250, 'epoch': 1} {'type': 'loss', 'content': 0.09876440465450287, 'timestamp': '2025-09-30 22:18:28.524115', 'step': 5251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:28.555779', 'step': 5251, 'epoch': 1} {'type': 'loss', 'content': 0.14016132056713104, 'timestamp': '2025-09-30 22:18:28.582396', 'step': 5252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.612801', 'step': 5252, 'epoch': 1} {'type': 'loss', 'content': 0.18988017737865448, 'timestamp': '2025-09-30 22:18:28.625055', 'step': 5253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.666228', 'step': 5253, 'epoch': 1} {'type': 'loss', 'content': 0.1950080394744873, 'timestamp': '2025-09-30 22:18:28.672103', 'step': 5254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:28.705822', 'step': 5254, 'epoch': 1} {'type': 'loss', 'content': 0.18534159660339355, 'timestamp': '2025-09-30 22:18:28.707921', 'step': 5255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.741701', 'step': 5255, 'epoch': 1} {'type': 'loss', 'content': 0.15825048089027405, 'timestamp': '2025-09-30 22:18:28.770262', 'step': 5256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:28.802796', 'step': 5256, 'epoch': 1} {'type': 'loss', 'content': 0.2797068655490875, 'timestamp': '2025-09-30 22:18:28.805730', 'step': 5257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:28.840640', 'step': 5257, 'epoch': 1} {'type': 'loss', 'content': 0.1404433399438858, 'timestamp': '2025-09-30 22:18:28.843556', 'step': 5258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:28.878792', 'step': 5258, 'epoch': 1} {'type': 'loss', 'content': 0.05859432741999626, 'timestamp': '2025-09-30 22:18:28.881985', 'step': 5259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:28.913206', 'step': 5259, 'epoch': 1} {'type': 'loss', 'content': 0.11439299583435059, 'timestamp': '2025-09-30 22:18:28.937208', 'step': 5260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:28.970147', 'step': 5260, 'epoch': 1} {'type': 'loss', 'content': 0.09619073569774628, 'timestamp': '2025-09-30 22:18:28.973048', 'step': 5261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.007755', 'step': 5261, 'epoch': 1} {'type': 'loss', 'content': 0.08630059659481049, 'timestamp': '2025-09-30 22:18:29.012058', 'step': 5262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:29.046757', 'step': 5262, 'epoch': 1} {'type': 'loss', 'content': 0.07188212126493454, 'timestamp': '2025-09-30 22:18:29.051302', 'step': 5263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:29.083933', 'step': 5263, 'epoch': 1} {'type': 'loss', 'content': 0.13366881012916565, 'timestamp': '2025-09-30 22:18:29.109387', 'step': 5264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:29.143381', 'step': 5264, 'epoch': 1} {'type': 'loss', 'content': 0.2679452896118164, 'timestamp': '2025-09-30 22:18:29.145654', 'step': 5265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:29.178472', 'step': 5265, 'epoch': 1} {'type': 'loss', 'content': 0.2044639140367508, 'timestamp': '2025-09-30 22:18:29.181233', 'step': 5266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:29.211960', 'step': 5266, 'epoch': 1} {'type': 'loss', 'content': 0.15770362317562103, 'timestamp': '2025-09-30 22:18:29.215546', 'step': 5267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.246999', 'step': 5267, 'epoch': 1} {'type': 'loss', 'content': 0.11800277978181839, 'timestamp': '2025-09-30 22:18:29.272444', 'step': 5268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:29.304463', 'step': 5268, 'epoch': 1} {'type': 'loss', 'content': 0.1919143944978714, 'timestamp': '2025-09-30 22:18:29.307994', 'step': 5269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.339287', 'step': 5269, 'epoch': 1} {'type': 'loss', 'content': 0.13564567267894745, 'timestamp': '2025-09-30 22:18:29.347406', 'step': 5270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.385002', 'step': 5270, 'epoch': 1} {'type': 'loss', 'content': 0.20235437154769897, 'timestamp': '2025-09-30 22:18:29.389343', 'step': 5271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.420646', 'step': 5271, 'epoch': 1} {'type': 'loss', 'content': 0.09805383533239365, 'timestamp': '2025-09-30 22:18:29.445128', 'step': 5272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:29.481191', 'step': 5272, 'epoch': 1} {'type': 'loss', 'content': 0.1803862303495407, 'timestamp': '2025-09-30 22:18:29.483893', 'step': 5273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:29.518166', 'step': 5273, 'epoch': 1} {'type': 'loss', 'content': 0.2516098916530609, 'timestamp': '2025-09-30 22:18:29.528912', 'step': 5274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:29.567600', 'step': 5274, 'epoch': 1} {'type': 'loss', 'content': 0.23766572773456573, 'timestamp': '2025-09-30 22:18:29.574623', 'step': 5275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:29.611159', 'step': 5275, 'epoch': 1} {'type': 'loss', 'content': 0.1372710019350052, 'timestamp': '2025-09-30 22:18:29.635974', 'step': 5276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:29.669878', 'step': 5276, 'epoch': 1} {'type': 'loss', 'content': 0.10610227286815643, 'timestamp': '2025-09-30 22:18:29.673045', 'step': 5277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.708163', 'step': 5277, 'epoch': 1} {'type': 'loss', 'content': 0.14703382551670074, 'timestamp': '2025-09-30 22:18:29.718573', 'step': 5278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:29.756912', 'step': 5278, 'epoch': 1} {'type': 'loss', 'content': 0.30155429244041443, 'timestamp': '2025-09-30 22:18:29.766052', 'step': 5279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.796763', 'step': 5279, 'epoch': 1} {'type': 'loss', 'content': 0.11799825727939606, 'timestamp': '2025-09-30 22:18:29.821741', 'step': 5280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:29.853210', 'step': 5280, 'epoch': 1} {'type': 'loss', 'content': 0.18203862011432648, 'timestamp': '2025-09-30 22:18:29.862063', 'step': 5281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:29.898430', 'step': 5281, 'epoch': 1} {'type': 'loss', 'content': 0.17150159180164337, 'timestamp': '2025-09-30 22:18:29.901028', 'step': 5282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:29.939060', 'step': 5282, 'epoch': 1} {'type': 'loss', 'content': 0.13719795644283295, 'timestamp': '2025-09-30 22:18:29.941713', 'step': 5283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:29.981043', 'step': 5283, 'epoch': 1} {'type': 'loss', 'content': 0.12144965678453445, 'timestamp': '2025-09-30 22:18:30.013976', 'step': 5284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.051306', 'step': 5284, 'epoch': 1} {'type': 'loss', 'content': 0.18508730828762054, 'timestamp': '2025-09-30 22:18:30.060469', 'step': 5285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:30.092164', 'step': 5285, 'epoch': 1} {'type': 'loss', 'content': 0.26983532309532166, 'timestamp': '2025-09-30 22:18:30.099233', 'step': 5286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:30.135678', 'step': 5286, 'epoch': 1} {'type': 'loss', 'content': 0.1307196170091629, 'timestamp': '2025-09-30 22:18:30.144704', 'step': 5287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.178734', 'step': 5287, 'epoch': 1} {'type': 'loss', 'content': 0.10916502773761749, 'timestamp': '2025-09-30 22:18:30.206823', 'step': 5288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:30.240282', 'step': 5288, 'epoch': 1} {'type': 'loss', 'content': 0.24948538839817047, 'timestamp': '2025-09-30 22:18:30.242695', 'step': 5289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:30.273621', 'step': 5289, 'epoch': 1} {'type': 'loss', 'content': 0.12293405085802078, 'timestamp': '2025-09-30 22:18:30.280702', 'step': 5290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:30.318516', 'step': 5290, 'epoch': 1} {'type': 'loss', 'content': 0.23031310737133026, 'timestamp': '2025-09-30 22:18:30.327589', 'step': 5291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:30.368946', 'step': 5291, 'epoch': 1} {'type': 'loss', 'content': 0.17818467319011688, 'timestamp': '2025-09-30 22:18:30.395391', 'step': 5292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:30.428091', 'step': 5292, 'epoch': 1} {'type': 'loss', 'content': 0.22259096801280975, 'timestamp': '2025-09-30 22:18:30.432502', 'step': 5293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.465664', 'step': 5293, 'epoch': 1} {'type': 'loss', 'content': 0.14777366816997528, 'timestamp': '2025-09-30 22:18:30.471232', 'step': 5294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:30.507389', 'step': 5294, 'epoch': 1} {'type': 'loss', 'content': 0.10262386500835419, 'timestamp': '2025-09-30 22:18:30.511788', 'step': 5295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:30.542909', 'step': 5295, 'epoch': 1} {'type': 'loss', 'content': 0.11923589557409286, 'timestamp': '2025-09-30 22:18:30.566688', 'step': 5296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.600227', 'step': 5296, 'epoch': 1} {'type': 'loss', 'content': 0.19439126551151276, 'timestamp': '2025-09-30 22:18:30.605468', 'step': 5297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.645107', 'step': 5297, 'epoch': 1} {'type': 'loss', 'content': 0.17008522152900696, 'timestamp': '2025-09-30 22:18:30.648796', 'step': 5298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:30.685428', 'step': 5298, 'epoch': 1} {'type': 'loss', 'content': 0.16444101929664612, 'timestamp': '2025-09-30 22:18:30.693268', 'step': 5299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:30.729282', 'step': 5299, 'epoch': 1} {'type': 'loss', 'content': 0.1421152502298355, 'timestamp': '2025-09-30 22:18:30.756677', 'step': 5300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.790829', 'step': 5300, 'epoch': 1} {'type': 'loss', 'content': 0.13247759640216827, 'timestamp': '2025-09-30 22:18:30.794750', 'step': 5301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:30.826411', 'step': 5301, 'epoch': 1} {'type': 'loss', 'content': 0.18459399044513702, 'timestamp': '2025-09-30 22:18:30.832286', 'step': 5302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.872753', 'step': 5302, 'epoch': 1} {'type': 'loss', 'content': 0.2404535859823227, 'timestamp': '2025-09-30 22:18:30.880236', 'step': 5303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.914568', 'step': 5303, 'epoch': 1} {'type': 'loss', 'content': 0.13173404335975647, 'timestamp': '2025-09-30 22:18:30.938881', 'step': 5304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:30.978675', 'step': 5304, 'epoch': 1} {'type': 'loss', 'content': 0.09419054538011551, 'timestamp': '2025-09-30 22:18:30.986457', 'step': 5305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:31.020698', 'step': 5305, 'epoch': 1} {'type': 'loss', 'content': 0.17641344666481018, 'timestamp': '2025-09-30 22:18:31.025982', 'step': 5306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.059413', 'step': 5306, 'epoch': 1} {'type': 'loss', 'content': 0.13160885870456696, 'timestamp': '2025-09-30 22:18:31.063671', 'step': 5307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:31.096202', 'step': 5307, 'epoch': 1} {'type': 'loss', 'content': 0.1575070172548294, 'timestamp': '2025-09-30 22:18:31.123046', 'step': 5308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.159182', 'step': 5308, 'epoch': 1} {'type': 'loss', 'content': 0.15653303265571594, 'timestamp': '2025-09-30 22:18:31.164332', 'step': 5309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:31.204615', 'step': 5309, 'epoch': 1} {'type': 'loss', 'content': 0.14990836381912231, 'timestamp': '2025-09-30 22:18:31.212305', 'step': 5310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.254816', 'step': 5310, 'epoch': 1} {'type': 'loss', 'content': 0.09868409484624863, 'timestamp': '2025-09-30 22:18:31.259237', 'step': 5311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:31.291966', 'step': 5311, 'epoch': 1} {'type': 'loss', 'content': 0.12191379815340042, 'timestamp': '2025-09-30 22:18:31.318483', 'step': 5312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:31.350530', 'step': 5312, 'epoch': 1} {'type': 'loss', 'content': 0.20039260387420654, 'timestamp': '2025-09-30 22:18:31.355230', 'step': 5313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.387933', 'step': 5313, 'epoch': 1} {'type': 'loss', 'content': 0.2833412289619446, 'timestamp': '2025-09-30 22:18:31.395592', 'step': 5314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.433909', 'step': 5314, 'epoch': 1} {'type': 'loss', 'content': 0.11840391904115677, 'timestamp': '2025-09-30 22:18:31.436850', 'step': 5315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.478088', 'step': 5315, 'epoch': 1} {'type': 'loss', 'content': 0.1941451132297516, 'timestamp': '2025-09-30 22:18:31.508347', 'step': 5316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:31.547090', 'step': 5316, 'epoch': 1} {'type': 'loss', 'content': 0.10013919323682785, 'timestamp': '2025-09-30 22:18:31.557748', 'step': 5317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.594838', 'step': 5317, 'epoch': 1} {'type': 'loss', 'content': 0.11691863089799881, 'timestamp': '2025-09-30 22:18:31.604463', 'step': 5318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.636787', 'step': 5318, 'epoch': 1} {'type': 'loss', 'content': 0.14787937700748444, 'timestamp': '2025-09-30 22:18:31.640839', 'step': 5319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.675697', 'step': 5319, 'epoch': 1} {'type': 'loss', 'content': 0.1647375077009201, 'timestamp': '2025-09-30 22:18:31.701216', 'step': 5320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.733588', 'step': 5320, 'epoch': 1} {'type': 'loss', 'content': 0.12567023932933807, 'timestamp': '2025-09-30 22:18:31.737887', 'step': 5321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:31.771965', 'step': 5321, 'epoch': 1} {'type': 'loss', 'content': 0.1066654771566391, 'timestamp': '2025-09-30 22:18:31.777714', 'step': 5322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.813769', 'step': 5322, 'epoch': 1} {'type': 'loss', 'content': 0.16935260593891144, 'timestamp': '2025-09-30 22:18:31.818434', 'step': 5323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.851697', 'step': 5323, 'epoch': 1} {'type': 'loss', 'content': 0.2608994245529175, 'timestamp': '2025-09-30 22:18:31.877754', 'step': 5324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:31.912345', 'step': 5324, 'epoch': 1} {'type': 'loss', 'content': 0.19144350290298462, 'timestamp': '2025-09-30 22:18:31.918482', 'step': 5325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.956608', 'step': 5325, 'epoch': 1} {'type': 'loss', 'content': 0.1189148873090744, 'timestamp': '2025-09-30 22:18:31.959729', 'step': 5326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:31.990064', 'step': 5326, 'epoch': 1} {'type': 'loss', 'content': 0.1298285573720932, 'timestamp': '2025-09-30 22:18:31.996138', 'step': 5327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.030872', 'step': 5327, 'epoch': 1} {'type': 'loss', 'content': 0.12126196920871735, 'timestamp': '2025-09-30 22:18:32.054534', 'step': 5328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.086999', 'step': 5328, 'epoch': 1} {'type': 'loss', 'content': 0.20891571044921875, 'timestamp': '2025-09-30 22:18:32.090523', 'step': 5329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:18:32.121878', 'step': 5329, 'epoch': 1} {'type': 'loss', 'content': 0.15096747875213623, 'timestamp': '2025-09-30 22:18:32.126579', 'step': 5330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.158707', 'step': 5330, 'epoch': 1} {'type': 'loss', 'content': 0.16815268993377686, 'timestamp': '2025-09-30 22:18:32.161839', 'step': 5331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.193347', 'step': 5331, 'epoch': 1} {'type': 'loss', 'content': 0.18046995997428894, 'timestamp': '2025-09-30 22:18:32.218033', 'step': 5332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.248833', 'step': 5332, 'epoch': 1} {'type': 'loss', 'content': 0.1779155433177948, 'timestamp': '2025-09-30 22:18:32.253479', 'step': 5333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.284343', 'step': 5333, 'epoch': 1} {'type': 'loss', 'content': 0.07449941337108612, 'timestamp': '2025-09-30 22:18:32.290677', 'step': 5334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.324275', 'step': 5334, 'epoch': 1} {'type': 'loss', 'content': 0.2128799706697464, 'timestamp': '2025-09-30 22:18:32.326909', 'step': 5335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.357999', 'step': 5335, 'epoch': 1} {'type': 'loss', 'content': 0.17007412016391754, 'timestamp': '2025-09-30 22:18:32.382092', 'step': 5336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.414307', 'step': 5336, 'epoch': 1} {'type': 'loss', 'content': 0.1586809903383255, 'timestamp': '2025-09-30 22:18:32.417612', 'step': 5337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.448986', 'step': 5337, 'epoch': 1} {'type': 'loss', 'content': 0.22602707147598267, 'timestamp': '2025-09-30 22:18:32.452482', 'step': 5338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:32.482857', 'step': 5338, 'epoch': 1} {'type': 'loss', 'content': 0.15428227186203003, 'timestamp': '2025-09-30 22:18:32.486517', 'step': 5339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.518806', 'step': 5339, 'epoch': 1} {'type': 'loss', 'content': 0.15210303664207458, 'timestamp': '2025-09-30 22:18:32.542610', 'step': 5340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.574136', 'step': 5340, 'epoch': 1} {'type': 'loss', 'content': 0.07437576353549957, 'timestamp': '2025-09-30 22:18:32.577373', 'step': 5341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.609205', 'step': 5341, 'epoch': 1} {'type': 'loss', 'content': 0.053953565657138824, 'timestamp': '2025-09-30 22:18:32.613109', 'step': 5342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.645362', 'step': 5342, 'epoch': 1} {'type': 'loss', 'content': 0.16841521859169006, 'timestamp': '2025-09-30 22:18:32.650965', 'step': 5343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.683869', 'step': 5343, 'epoch': 1} {'type': 'loss', 'content': 0.1647575944662094, 'timestamp': '2025-09-30 22:18:32.707724', 'step': 5344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.739012', 'step': 5344, 'epoch': 1} {'type': 'loss', 'content': 0.19483527541160583, 'timestamp': '2025-09-30 22:18:32.742826', 'step': 5345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.775348', 'step': 5345, 'epoch': 1} {'type': 'loss', 'content': 0.2153262346982956, 'timestamp': '2025-09-30 22:18:32.777704', 'step': 5346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:32.813650', 'step': 5346, 'epoch': 1} {'type': 'loss', 'content': 0.18759708106517792, 'timestamp': '2025-09-30 22:18:32.817503', 'step': 5347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:32.850087', 'step': 5347, 'epoch': 1} {'type': 'loss', 'content': 0.22678717970848083, 'timestamp': '2025-09-30 22:18:32.874722', 'step': 5348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.906633', 'step': 5348, 'epoch': 1} {'type': 'loss', 'content': 0.15195459127426147, 'timestamp': '2025-09-30 22:18:32.910150', 'step': 5349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:32.941063', 'step': 5349, 'epoch': 1} {'type': 'loss', 'content': 0.2072143256664276, 'timestamp': '2025-09-30 22:18:32.943551', 'step': 5350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:32.975376', 'step': 5350, 'epoch': 1} {'type': 'loss', 'content': 0.20902717113494873, 'timestamp': '2025-09-30 22:18:32.981877', 'step': 5351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.018040', 'step': 5351, 'epoch': 1} {'type': 'loss', 'content': 0.21290652453899384, 'timestamp': '2025-09-30 22:18:33.044099', 'step': 5352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.075363', 'step': 5352, 'epoch': 1} {'type': 'loss', 'content': 0.11406650394201279, 'timestamp': '2025-09-30 22:18:33.079573', 'step': 5353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:33.113156', 'step': 5353, 'epoch': 1} {'type': 'loss', 'content': 0.11164025217294693, 'timestamp': '2025-09-30 22:18:33.117180', 'step': 5354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:33.153752', 'step': 5354, 'epoch': 1} {'type': 'loss', 'content': 0.22035355865955353, 'timestamp': '2025-09-30 22:18:33.156582', 'step': 5355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:33.188524', 'step': 5355, 'epoch': 1} {'type': 'loss', 'content': 0.16795888543128967, 'timestamp': '2025-09-30 22:18:33.215078', 'step': 5356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.245271', 'step': 5356, 'epoch': 1} {'type': 'loss', 'content': 0.14002062380313873, 'timestamp': '2025-09-30 22:18:33.249934', 'step': 5357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:33.282583', 'step': 5357, 'epoch': 1} {'type': 'loss', 'content': 0.18921764194965363, 'timestamp': '2025-09-30 22:18:33.286343', 'step': 5358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.323855', 'step': 5358, 'epoch': 1} {'type': 'loss', 'content': 0.11581242829561234, 'timestamp': '2025-09-30 22:18:33.326883', 'step': 5359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:33.358936', 'step': 5359, 'epoch': 1} {'type': 'loss', 'content': 0.1578453779220581, 'timestamp': '2025-09-30 22:18:33.383824', 'step': 5360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.422335', 'step': 5360, 'epoch': 1} {'type': 'loss', 'content': 0.13294683396816254, 'timestamp': '2025-09-30 22:18:33.433653', 'step': 5361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.465966', 'step': 5361, 'epoch': 1} {'type': 'loss', 'content': 0.1321534365415573, 'timestamp': '2025-09-30 22:18:33.470648', 'step': 5362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.503809', 'step': 5362, 'epoch': 1} {'type': 'loss', 'content': 0.15103018283843994, 'timestamp': '2025-09-30 22:18:33.508245', 'step': 5363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.539453', 'step': 5363, 'epoch': 1} {'type': 'loss', 'content': 0.11486571282148361, 'timestamp': '2025-09-30 22:18:33.566184', 'step': 5364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:33.599156', 'step': 5364, 'epoch': 1} {'type': 'loss', 'content': 0.10977815836668015, 'timestamp': '2025-09-30 22:18:33.603419', 'step': 5365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:33.635656', 'step': 5365, 'epoch': 1} {'type': 'loss', 'content': 0.16538327932357788, 'timestamp': '2025-09-30 22:18:33.640608', 'step': 5366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.673111', 'step': 5366, 'epoch': 1} {'type': 'loss', 'content': 0.09022540599107742, 'timestamp': '2025-09-30 22:18:33.679095', 'step': 5367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.713063', 'step': 5367, 'epoch': 1} {'type': 'loss', 'content': 0.10001077502965927, 'timestamp': '2025-09-30 22:18:33.739429', 'step': 5368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:33.770314', 'step': 5368, 'epoch': 1} {'type': 'loss', 'content': 0.20600606501102448, 'timestamp': '2025-09-30 22:18:33.774867', 'step': 5369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:33.811956', 'step': 5369, 'epoch': 1} {'type': 'loss', 'content': 0.11171303689479828, 'timestamp': '2025-09-30 22:18:33.814222', 'step': 5370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:33.851724', 'step': 5370, 'epoch': 1} {'type': 'loss', 'content': 0.1503942906856537, 'timestamp': '2025-09-30 22:18:33.856407', 'step': 5371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.890183', 'step': 5371, 'epoch': 1} {'type': 'loss', 'content': 0.16693022847175598, 'timestamp': '2025-09-30 22:18:33.917858', 'step': 5372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:33.948219', 'step': 5372, 'epoch': 1} {'type': 'loss', 'content': 0.13199864327907562, 'timestamp': '2025-09-30 22:18:33.953894', 'step': 5373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:33.987032', 'step': 5373, 'epoch': 1} {'type': 'loss', 'content': 0.12394823879003525, 'timestamp': '2025-09-30 22:18:33.995782', 'step': 5374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:34.028097', 'step': 5374, 'epoch': 1} {'type': 'loss', 'content': 0.08330292254686356, 'timestamp': '2025-09-30 22:18:34.030634', 'step': 5375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.061015', 'step': 5375, 'epoch': 1} {'type': 'loss', 'content': 0.11254869401454926, 'timestamp': '2025-09-30 22:18:34.089686', 'step': 5376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.121454', 'step': 5376, 'epoch': 1} {'type': 'loss', 'content': 0.12259658426046371, 'timestamp': '2025-09-30 22:18:34.131482', 'step': 5377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.170921', 'step': 5377, 'epoch': 1} {'type': 'loss', 'content': 0.12588734924793243, 'timestamp': '2025-09-30 22:18:34.173257', 'step': 5378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.205976', 'step': 5378, 'epoch': 1} {'type': 'loss', 'content': 0.19760921597480774, 'timestamp': '2025-09-30 22:18:34.209260', 'step': 5379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.242461', 'step': 5379, 'epoch': 1} {'type': 'loss', 'content': 0.17922116816043854, 'timestamp': '2025-09-30 22:18:34.272661', 'step': 5380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:34.308268', 'step': 5380, 'epoch': 1} {'type': 'loss', 'content': 0.1329360455274582, 'timestamp': '2025-09-30 22:18:34.316800', 'step': 5381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.349026', 'step': 5381, 'epoch': 1} {'type': 'loss', 'content': 0.13531070947647095, 'timestamp': '2025-09-30 22:18:34.359955', 'step': 5382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.392924', 'step': 5382, 'epoch': 1} {'type': 'loss', 'content': 0.0925944447517395, 'timestamp': '2025-09-30 22:18:34.395418', 'step': 5383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.427196', 'step': 5383, 'epoch': 1} {'type': 'loss', 'content': 0.13948635756969452, 'timestamp': '2025-09-30 22:18:34.454191', 'step': 5384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:34.492489', 'step': 5384, 'epoch': 1} {'type': 'loss', 'content': 0.12063593417406082, 'timestamp': '2025-09-30 22:18:34.497000', 'step': 5385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.536929', 'step': 5385, 'epoch': 1} {'type': 'loss', 'content': 0.09554274380207062, 'timestamp': '2025-09-30 22:18:34.540925', 'step': 5386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.573850', 'step': 5386, 'epoch': 1} {'type': 'loss', 'content': 0.1725182682275772, 'timestamp': '2025-09-30 22:18:34.581453', 'step': 5387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:34.618801', 'step': 5387, 'epoch': 1} {'type': 'loss', 'content': 0.15125668048858643, 'timestamp': '2025-09-30 22:18:34.643737', 'step': 5388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:34.681546', 'step': 5388, 'epoch': 1} {'type': 'loss', 'content': 0.11657581478357315, 'timestamp': '2025-09-30 22:18:34.684710', 'step': 5389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.730592', 'step': 5389, 'epoch': 1} {'type': 'loss', 'content': 0.19521568715572357, 'timestamp': '2025-09-30 22:18:34.736141', 'step': 5390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:34.767799', 'step': 5390, 'epoch': 1} {'type': 'loss', 'content': 0.17749378085136414, 'timestamp': '2025-09-30 22:18:34.773620', 'step': 5391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.809476', 'step': 5391, 'epoch': 1} {'type': 'loss', 'content': 0.22882027924060822, 'timestamp': '2025-09-30 22:18:34.835200', 'step': 5392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:34.867723', 'step': 5392, 'epoch': 1} {'type': 'loss', 'content': 0.07992999255657196, 'timestamp': '2025-09-30 22:18:34.874764', 'step': 5393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:34.919719', 'step': 5393, 'epoch': 1} {'type': 'loss', 'content': 0.112175352871418, 'timestamp': '2025-09-30 22:18:34.927133', 'step': 5394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:34.961400', 'step': 5394, 'epoch': 1} {'type': 'loss', 'content': 0.2048279047012329, 'timestamp': '2025-09-30 22:18:34.966504', 'step': 5395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:35.004817', 'step': 5395, 'epoch': 1} {'type': 'loss', 'content': 0.1875072419643402, 'timestamp': '2025-09-30 22:18:35.031259', 'step': 5396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.064327', 'step': 5396, 'epoch': 1} {'type': 'loss', 'content': 0.12462719529867172, 'timestamp': '2025-09-30 22:18:35.072701', 'step': 5397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.115464', 'step': 5397, 'epoch': 1} {'type': 'loss', 'content': 0.1946282833814621, 'timestamp': '2025-09-30 22:18:35.127581', 'step': 5398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:35.195706', 'step': 5398, 'epoch': 1} {'type': 'loss', 'content': 0.20712672173976898, 'timestamp': '2025-09-30 22:18:35.199651', 'step': 5399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:35.242201', 'step': 5399, 'epoch': 1} {'type': 'loss', 'content': 0.18171702325344086, 'timestamp': '2025-09-30 22:18:35.277152', 'step': 5400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.308796', 'step': 5400, 'epoch': 1} {'type': 'loss', 'content': 0.12195838242769241, 'timestamp': '2025-09-30 22:18:35.319243', 'step': 5401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:35.356789', 'step': 5401, 'epoch': 1} {'type': 'loss', 'content': 0.13967059552669525, 'timestamp': '2025-09-30 22:18:35.361712', 'step': 5402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:35.393538', 'step': 5402, 'epoch': 1} {'type': 'loss', 'content': 0.1660296767950058, 'timestamp': '2025-09-30 22:18:35.419656', 'step': 5403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:35.457950', 'step': 5403, 'epoch': 1} {'type': 'loss', 'content': 0.10549522191286087, 'timestamp': '2025-09-30 22:18:35.483051', 'step': 5404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.518405', 'step': 5404, 'epoch': 1} {'type': 'loss', 'content': 0.11715894192457199, 'timestamp': '2025-09-30 22:18:35.533244', 'step': 5405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.571945', 'step': 5405, 'epoch': 1} {'type': 'loss', 'content': 0.20351457595825195, 'timestamp': '2025-09-30 22:18:35.574676', 'step': 5406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.615327', 'step': 5406, 'epoch': 1} {'type': 'loss', 'content': 0.15627628564834595, 'timestamp': '2025-09-30 22:18:35.619332', 'step': 5407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:35.652311', 'step': 5407, 'epoch': 1} {'type': 'loss', 'content': 0.12950493395328522, 'timestamp': '2025-09-30 22:18:35.678057', 'step': 5408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:35.713417', 'step': 5408, 'epoch': 1} {'type': 'loss', 'content': 0.06481093913316727, 'timestamp': '2025-09-30 22:18:35.728862', 'step': 5409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:35.765514', 'step': 5409, 'epoch': 1} {'type': 'loss', 'content': 0.120325468480587, 'timestamp': '2025-09-30 22:18:35.771721', 'step': 5410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.802261', 'step': 5410, 'epoch': 1} {'type': 'loss', 'content': 0.15362738072872162, 'timestamp': '2025-09-30 22:18:35.806965', 'step': 5411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:35.852061', 'step': 5411, 'epoch': 1} {'type': 'loss', 'content': 0.2147265374660492, 'timestamp': '2025-09-30 22:18:35.877031', 'step': 5412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.913161', 'step': 5412, 'epoch': 1} {'type': 'loss', 'content': 0.12008653581142426, 'timestamp': '2025-09-30 22:18:35.918162', 'step': 5413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:35.951478', 'step': 5413, 'epoch': 1} {'type': 'loss', 'content': 0.0950072780251503, 'timestamp': '2025-09-30 22:18:35.955288', 'step': 5414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:35.989596', 'step': 5414, 'epoch': 1} {'type': 'loss', 'content': 0.14691834151744843, 'timestamp': '2025-09-30 22:18:35.991850', 'step': 5415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.028804', 'step': 5415, 'epoch': 1} {'type': 'loss', 'content': 0.24111150205135345, 'timestamp': '2025-09-30 22:18:36.057415', 'step': 5416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.088117', 'step': 5416, 'epoch': 1} {'type': 'loss', 'content': 0.15590815246105194, 'timestamp': '2025-09-30 22:18:36.093529', 'step': 5417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:36.123983', 'step': 5417, 'epoch': 1} {'type': 'loss', 'content': 0.20079264044761658, 'timestamp': '2025-09-30 22:18:36.126709', 'step': 5418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.157406', 'step': 5418, 'epoch': 1} {'type': 'loss', 'content': 0.10414744913578033, 'timestamp': '2025-09-30 22:18:36.160637', 'step': 5419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.194082', 'step': 5419, 'epoch': 1} {'type': 'loss', 'content': 0.12436585128307343, 'timestamp': '2025-09-30 22:18:36.221658', 'step': 5420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.252210', 'step': 5420, 'epoch': 1} {'type': 'loss', 'content': 0.12702946364879608, 'timestamp': '2025-09-30 22:18:36.257068', 'step': 5421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.289727', 'step': 5421, 'epoch': 1} {'type': 'loss', 'content': 0.2228093296289444, 'timestamp': '2025-09-30 22:18:36.293681', 'step': 5422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.324509', 'step': 5422, 'epoch': 1} {'type': 'loss', 'content': 0.11525300145149231, 'timestamp': '2025-09-30 22:18:36.329447', 'step': 5423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.363380', 'step': 5423, 'epoch': 1} {'type': 'loss', 'content': 0.1809292584657669, 'timestamp': '2025-09-30 22:18:36.391468', 'step': 5424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:36.422169', 'step': 5424, 'epoch': 1} {'type': 'loss', 'content': 0.14727865159511566, 'timestamp': '2025-09-30 22:18:36.425879', 'step': 5425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.458458', 'step': 5425, 'epoch': 1} {'type': 'loss', 'content': 0.10420791059732437, 'timestamp': '2025-09-30 22:18:36.463725', 'step': 5426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.496476', 'step': 5426, 'epoch': 1} {'type': 'loss', 'content': 0.08820587396621704, 'timestamp': '2025-09-30 22:18:36.500690', 'step': 5427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.532958', 'step': 5427, 'epoch': 1} {'type': 'loss', 'content': 0.22401602566242218, 'timestamp': '2025-09-30 22:18:36.559142', 'step': 5428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.592950', 'step': 5428, 'epoch': 1} {'type': 'loss', 'content': 0.09063388407230377, 'timestamp': '2025-09-30 22:18:36.599234', 'step': 5429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.629984', 'step': 5429, 'epoch': 1} {'type': 'loss', 'content': 0.09359660744667053, 'timestamp': '2025-09-30 22:18:36.636690', 'step': 5430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.675620', 'step': 5430, 'epoch': 1} {'type': 'loss', 'content': 0.13247400522232056, 'timestamp': '2025-09-30 22:18:36.680527', 'step': 5431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.716167', 'step': 5431, 'epoch': 1} {'type': 'loss', 'content': 0.10031018406152725, 'timestamp': '2025-09-30 22:18:36.740098', 'step': 5432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.775195', 'step': 5432, 'epoch': 1} {'type': 'loss', 'content': 0.13545386493206024, 'timestamp': '2025-09-30 22:18:36.780718', 'step': 5433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:36.814678', 'step': 5433, 'epoch': 1} {'type': 'loss', 'content': 0.19583731889724731, 'timestamp': '2025-09-30 22:18:36.821319', 'step': 5434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.858787', 'step': 5434, 'epoch': 1} {'type': 'loss', 'content': 0.28619328141212463, 'timestamp': '2025-09-30 22:18:36.862118', 'step': 5435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.892681', 'step': 5435, 'epoch': 1} {'type': 'loss', 'content': 0.12656810879707336, 'timestamp': '2025-09-30 22:18:36.917287', 'step': 5436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:36.948329', 'step': 5436, 'epoch': 1} {'type': 'loss', 'content': 0.15470488369464874, 'timestamp': '2025-09-30 22:18:36.953758', 'step': 5437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:36.992267', 'step': 5437, 'epoch': 1} {'type': 'loss', 'content': 0.1708756536245346, 'timestamp': '2025-09-30 22:18:36.998228', 'step': 5438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.035719', 'step': 5438, 'epoch': 1} {'type': 'loss', 'content': 0.13845473527908325, 'timestamp': '2025-09-30 22:18:37.038620', 'step': 5439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.086014', 'step': 5439, 'epoch': 1} {'type': 'loss', 'content': 0.1588640809059143, 'timestamp': '2025-09-30 22:18:37.111145', 'step': 5440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.144376', 'step': 5440, 'epoch': 1} {'type': 'loss', 'content': 0.1997639685869217, 'timestamp': '2025-09-30 22:18:37.148379', 'step': 5441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.181119', 'step': 5441, 'epoch': 1} {'type': 'loss', 'content': 0.1494986116886139, 'timestamp': '2025-09-30 22:18:37.186158', 'step': 5442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:37.216543', 'step': 5442, 'epoch': 1} {'type': 'loss', 'content': 0.1528017818927765, 'timestamp': '2025-09-30 22:18:37.222040', 'step': 5443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.255136', 'step': 5443, 'epoch': 1} {'type': 'loss', 'content': 0.11823748052120209, 'timestamp': '2025-09-30 22:18:37.281103', 'step': 5444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.313848', 'step': 5444, 'epoch': 1} {'type': 'loss', 'content': 0.11835236847400665, 'timestamp': '2025-09-30 22:18:37.318435', 'step': 5445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.351293', 'step': 5445, 'epoch': 1} {'type': 'loss', 'content': 0.14461885392665863, 'timestamp': '2025-09-30 22:18:37.354622', 'step': 5446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.388653', 'step': 5446, 'epoch': 1} {'type': 'loss', 'content': 0.14924807846546173, 'timestamp': '2025-09-30 22:18:37.394433', 'step': 5447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:37.426323', 'step': 5447, 'epoch': 1} {'type': 'loss', 'content': 0.12075238674879074, 'timestamp': '2025-09-30 22:18:37.450423', 'step': 5448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.482804', 'step': 5448, 'epoch': 1} {'type': 'loss', 'content': 0.18510504066944122, 'timestamp': '2025-09-30 22:18:37.486627', 'step': 5449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:37.517279', 'step': 5449, 'epoch': 1} {'type': 'loss', 'content': 0.12566012144088745, 'timestamp': '2025-09-30 22:18:37.519562', 'step': 5450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:37.551994', 'step': 5450, 'epoch': 1} {'type': 'loss', 'content': 0.2586619257926941, 'timestamp': '2025-09-30 22:18:37.556290', 'step': 5451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:37.586946', 'step': 5451, 'epoch': 1} {'type': 'loss', 'content': 0.11206936836242676, 'timestamp': '2025-09-30 22:18:37.612130', 'step': 5452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:37.642318', 'step': 5452, 'epoch': 1} {'type': 'loss', 'content': 0.16170209646224976, 'timestamp': '2025-09-30 22:18:37.646775', 'step': 5453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.679139', 'step': 5453, 'epoch': 1} {'type': 'loss', 'content': 0.23668637871742249, 'timestamp': '2025-09-30 22:18:37.684425', 'step': 5454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.717269', 'step': 5454, 'epoch': 1} {'type': 'loss', 'content': 0.12972934544086456, 'timestamp': '2025-09-30 22:18:37.719936', 'step': 5455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.751470', 'step': 5455, 'epoch': 1} {'type': 'loss', 'content': 0.09551326930522919, 'timestamp': '2025-09-30 22:18:37.777283', 'step': 5456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.807363', 'step': 5456, 'epoch': 1} {'type': 'loss', 'content': 0.10976701974868774, 'timestamp': '2025-09-30 22:18:37.810397', 'step': 5457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:37.856951', 'step': 5457, 'epoch': 1} {'type': 'loss', 'content': 0.25893229246139526, 'timestamp': '2025-09-30 22:18:37.859772', 'step': 5458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:37.893176', 'step': 5458, 'epoch': 1} {'type': 'loss', 'content': 0.13470232486724854, 'timestamp': '2025-09-30 22:18:37.897655', 'step': 5459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:37.930162', 'step': 5459, 'epoch': 1} {'type': 'loss', 'content': 0.19435380399227142, 'timestamp': '2025-09-30 22:18:37.956686', 'step': 5460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:37.988853', 'step': 5460, 'epoch': 1} {'type': 'loss', 'content': 0.1385154277086258, 'timestamp': '2025-09-30 22:18:37.994057', 'step': 5461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.027792', 'step': 5461, 'epoch': 1} {'type': 'loss', 'content': 0.11240709573030472, 'timestamp': '2025-09-30 22:18:38.030733', 'step': 5462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.064612', 'step': 5462, 'epoch': 1} {'type': 'loss', 'content': 0.13387827575206757, 'timestamp': '2025-09-30 22:18:38.068762', 'step': 5463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.102342', 'step': 5463, 'epoch': 1} {'type': 'loss', 'content': 0.0790974497795105, 'timestamp': '2025-09-30 22:18:38.126590', 'step': 5464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.165662', 'step': 5464, 'epoch': 1} {'type': 'loss', 'content': 0.0761062353849411, 'timestamp': '2025-09-30 22:18:38.170932', 'step': 5465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.201426', 'step': 5465, 'epoch': 1} {'type': 'loss', 'content': 0.1821412593126297, 'timestamp': '2025-09-30 22:18:38.205016', 'step': 5466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.238317', 'step': 5466, 'epoch': 1} {'type': 'loss', 'content': 0.14514318108558655, 'timestamp': '2025-09-30 22:18:38.243763', 'step': 5467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.277009', 'step': 5467, 'epoch': 1} {'type': 'loss', 'content': 0.21625736355781555, 'timestamp': '2025-09-30 22:18:38.303650', 'step': 5468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:38.335511', 'step': 5468, 'epoch': 1} {'type': 'loss', 'content': 0.10046207159757614, 'timestamp': '2025-09-30 22:18:38.339965', 'step': 5469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.372101', 'step': 5469, 'epoch': 1} {'type': 'loss', 'content': 0.22340622544288635, 'timestamp': '2025-09-30 22:18:38.374490', 'step': 5470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.407347', 'step': 5470, 'epoch': 1} {'type': 'loss', 'content': 0.13349415361881256, 'timestamp': '2025-09-30 22:18:38.409776', 'step': 5471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.440953', 'step': 5471, 'epoch': 1} {'type': 'loss', 'content': 0.13318270444869995, 'timestamp': '2025-09-30 22:18:38.465904', 'step': 5472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.498075', 'step': 5472, 'epoch': 1} {'type': 'loss', 'content': 0.1484176218509674, 'timestamp': '2025-09-30 22:18:38.500116', 'step': 5473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.534766', 'step': 5473, 'epoch': 1} {'type': 'loss', 'content': 0.17359158396720886, 'timestamp': '2025-09-30 22:18:38.546779', 'step': 5474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.580268', 'step': 5474, 'epoch': 1} {'type': 'loss', 'content': 0.09942306578159332, 'timestamp': '2025-09-30 22:18:38.583328', 'step': 5475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.614639', 'step': 5475, 'epoch': 1} {'type': 'loss', 'content': 0.11255539953708649, 'timestamp': '2025-09-30 22:18:38.640665', 'step': 5476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.671957', 'step': 5476, 'epoch': 1} {'type': 'loss', 'content': 0.22477643191814423, 'timestamp': '2025-09-30 22:18:38.676926', 'step': 5477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.709895', 'step': 5477, 'epoch': 1} {'type': 'loss', 'content': 0.23302961885929108, 'timestamp': '2025-09-30 22:18:38.716103', 'step': 5478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.745965', 'step': 5478, 'epoch': 1} {'type': 'loss', 'content': 0.16170045733451843, 'timestamp': '2025-09-30 22:18:38.751630', 'step': 5479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.786677', 'step': 5479, 'epoch': 1} {'type': 'loss', 'content': 0.11734883487224579, 'timestamp': '2025-09-30 22:18:38.814359', 'step': 5480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:38.848335', 'step': 5480, 'epoch': 1} {'type': 'loss', 'content': 0.21402575075626373, 'timestamp': '2025-09-30 22:18:38.851124', 'step': 5481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.881999', 'step': 5481, 'epoch': 1} {'type': 'loss', 'content': 0.11190997809171677, 'timestamp': '2025-09-30 22:18:38.888117', 'step': 5482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:38.919899', 'step': 5482, 'epoch': 1} {'type': 'loss', 'content': 0.1327095627784729, 'timestamp': '2025-09-30 22:18:38.925563', 'step': 5483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:38.960108', 'step': 5483, 'epoch': 1} {'type': 'loss', 'content': 0.1861320436000824, 'timestamp': '2025-09-30 22:18:38.987394', 'step': 5484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:39.019832', 'step': 5484, 'epoch': 1} {'type': 'loss', 'content': 0.1099097952246666, 'timestamp': '2025-09-30 22:18:39.023397', 'step': 5485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:39.054543', 'step': 5485, 'epoch': 1} {'type': 'loss', 'content': 0.21026349067687988, 'timestamp': '2025-09-30 22:18:39.060024', 'step': 5486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:39.091465', 'step': 5486, 'epoch': 1} {'type': 'loss', 'content': 0.13528499007225037, 'timestamp': '2025-09-30 22:18:39.105393', 'step': 5487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.151033', 'step': 5487, 'epoch': 1} {'type': 'loss', 'content': 0.14854571223258972, 'timestamp': '2025-09-30 22:18:39.183008', 'step': 5488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.221778', 'step': 5488, 'epoch': 1} {'type': 'loss', 'content': 0.0941755399107933, 'timestamp': '2025-09-30 22:18:39.225427', 'step': 5489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:39.256754', 'step': 5489, 'epoch': 1} {'type': 'loss', 'content': 0.16418848931789398, 'timestamp': '2025-09-30 22:18:39.262738', 'step': 5490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:39.296966', 'step': 5490, 'epoch': 1} {'type': 'loss', 'content': 0.07512757927179337, 'timestamp': '2025-09-30 22:18:39.299710', 'step': 5491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.338042', 'step': 5491, 'epoch': 1} {'type': 'loss', 'content': 0.09914494305849075, 'timestamp': '2025-09-30 22:18:39.364944', 'step': 5492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.401384', 'step': 5492, 'epoch': 1} {'type': 'loss', 'content': 0.17181329429149628, 'timestamp': '2025-09-30 22:18:39.404757', 'step': 5493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:39.438493', 'step': 5493, 'epoch': 1} {'type': 'loss', 'content': 0.12534673511981964, 'timestamp': '2025-09-30 22:18:39.441330', 'step': 5494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:39.471879', 'step': 5494, 'epoch': 1} {'type': 'loss', 'content': 0.15875329077243805, 'timestamp': '2025-09-30 22:18:39.478077', 'step': 5495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.512911', 'step': 5495, 'epoch': 1} {'type': 'loss', 'content': 0.2323889583349228, 'timestamp': '2025-09-30 22:18:39.538323', 'step': 5496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.572613', 'step': 5496, 'epoch': 1} {'type': 'loss', 'content': 0.1337144374847412, 'timestamp': '2025-09-30 22:18:39.578215', 'step': 5497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:39.608953', 'step': 5497, 'epoch': 1} {'type': 'loss', 'content': 0.10022266954183578, 'timestamp': '2025-09-30 22:18:39.615815', 'step': 5498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:39.651050', 'step': 5498, 'epoch': 1} {'type': 'loss', 'content': 0.18073853850364685, 'timestamp': '2025-09-30 22:18:39.656486', 'step': 5499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:39.690517', 'step': 5499, 'epoch': 1} {'type': 'loss', 'content': 0.18532727658748627, 'timestamp': '2025-09-30 22:18:39.718537', 'step': 5500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 5500', 'timestamp': '2025-09-30 22:18:44.970269', 'step': 5500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:45.010605', 'step': 5500, 'epoch': 1} {'type': 'loss', 'content': 0.1776922196149826, 'timestamp': '2025-09-30 22:18:45.014555', 'step': 5501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:45.046003', 'step': 5501, 'epoch': 1} {'type': 'loss', 'content': 0.2599848210811615, 'timestamp': '2025-09-30 22:18:45.048398', 'step': 5502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.079285', 'step': 5502, 'epoch': 1} {'type': 'loss', 'content': 0.02231806330382824, 'timestamp': '2025-09-30 22:18:45.083517', 'step': 5503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.116101', 'step': 5503, 'epoch': 1} {'type': 'loss', 'content': 0.11895207315683365, 'timestamp': '2025-09-30 22:18:45.140451', 'step': 5504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:45.173538', 'step': 5504, 'epoch': 1} {'type': 'loss', 'content': 0.11607996374368668, 'timestamp': '2025-09-30 22:18:45.178669', 'step': 5505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:45.211083', 'step': 5505, 'epoch': 1} {'type': 'loss', 'content': 0.19779609143733978, 'timestamp': '2025-09-30 22:18:45.216416', 'step': 5506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:45.249675', 'step': 5506, 'epoch': 1} {'type': 'loss', 'content': 0.2129870504140854, 'timestamp': '2025-09-30 22:18:45.254666', 'step': 5507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.287550', 'step': 5507, 'epoch': 1} {'type': 'loss', 'content': 0.1560114026069641, 'timestamp': '2025-09-30 22:18:45.313833', 'step': 5508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.344103', 'step': 5508, 'epoch': 1} {'type': 'loss', 'content': 0.1584642082452774, 'timestamp': '2025-09-30 22:18:45.346596', 'step': 5509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.377999', 'step': 5509, 'epoch': 1} {'type': 'loss', 'content': 0.17570722103118896, 'timestamp': '2025-09-30 22:18:45.382914', 'step': 5510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:45.416339', 'step': 5510, 'epoch': 1} {'type': 'loss', 'content': 0.16013343632221222, 'timestamp': '2025-09-30 22:18:45.423849', 'step': 5511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:45.455107', 'step': 5511, 'epoch': 1} {'type': 'loss', 'content': 0.11777844280004501, 'timestamp': '2025-09-30 22:18:45.483364', 'step': 5512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:45.513873', 'step': 5512, 'epoch': 1} {'type': 'loss', 'content': 0.20657072961330414, 'timestamp': '2025-09-30 22:18:45.522355', 'step': 5513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.557607', 'step': 5513, 'epoch': 1} {'type': 'loss', 'content': 0.13762763142585754, 'timestamp': '2025-09-30 22:18:45.560469', 'step': 5514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:45.594616', 'step': 5514, 'epoch': 1} {'type': 'loss', 'content': 0.14460010826587677, 'timestamp': '2025-09-30 22:18:45.602747', 'step': 5515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.638263', 'step': 5515, 'epoch': 1} {'type': 'loss', 'content': 0.077416330575943, 'timestamp': '2025-09-30 22:18:45.668257', 'step': 5516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:45.705174', 'step': 5516, 'epoch': 1} {'type': 'loss', 'content': 0.12651143968105316, 'timestamp': '2025-09-30 22:18:45.708488', 'step': 5517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:45.745294', 'step': 5517, 'epoch': 1} {'type': 'loss', 'content': 0.1845991313457489, 'timestamp': '2025-09-30 22:18:45.752504', 'step': 5518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:45.787180', 'step': 5518, 'epoch': 1} {'type': 'loss', 'content': 0.10439077019691467, 'timestamp': '2025-09-30 22:18:45.789789', 'step': 5519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:45.824590', 'step': 5519, 'epoch': 1} {'type': 'loss', 'content': 0.09585850685834885, 'timestamp': '2025-09-30 22:18:45.848624', 'step': 5520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:45.884349', 'step': 5520, 'epoch': 1} {'type': 'loss', 'content': 0.1650633066892624, 'timestamp': '2025-09-30 22:18:45.891736', 'step': 5521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:45.927373', 'step': 5521, 'epoch': 1} {'type': 'loss', 'content': 0.0942460298538208, 'timestamp': '2025-09-30 22:18:45.937834', 'step': 5522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:45.968803', 'step': 5522, 'epoch': 1} {'type': 'loss', 'content': 0.14174792170524597, 'timestamp': '2025-09-30 22:18:45.976742', 'step': 5523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.008301', 'step': 5523, 'epoch': 1} {'type': 'loss', 'content': 0.13231126964092255, 'timestamp': '2025-09-30 22:18:46.033449', 'step': 5524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.065499', 'step': 5524, 'epoch': 1} {'type': 'loss', 'content': 0.10619328916072845, 'timestamp': '2025-09-30 22:18:46.075898', 'step': 5525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:18:46.108312', 'step': 5525, 'epoch': 1} {'type': 'loss', 'content': 0.20769570767879486, 'timestamp': '2025-09-30 22:18:46.117136', 'step': 5526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:46.154334', 'step': 5526, 'epoch': 1} {'type': 'loss', 'content': 0.1469639092683792, 'timestamp': '2025-09-30 22:18:46.158479', 'step': 5527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:46.196469', 'step': 5527, 'epoch': 1} {'type': 'loss', 'content': 0.17474524676799774, 'timestamp': '2025-09-30 22:18:46.225605', 'step': 5528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.262326', 'step': 5528, 'epoch': 1} {'type': 'loss', 'content': 0.0796385407447815, 'timestamp': '2025-09-30 22:18:46.265082', 'step': 5529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.295118', 'step': 5529, 'epoch': 1} {'type': 'loss', 'content': 0.1486443728208542, 'timestamp': '2025-09-30 22:18:46.302960', 'step': 5530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.332881', 'step': 5530, 'epoch': 1} {'type': 'loss', 'content': 0.13854797184467316, 'timestamp': '2025-09-30 22:18:46.339095', 'step': 5531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.380111', 'step': 5531, 'epoch': 1} {'type': 'loss', 'content': 0.20261959731578827, 'timestamp': '2025-09-30 22:18:46.405295', 'step': 5532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.440102', 'step': 5532, 'epoch': 1} {'type': 'loss', 'content': 0.15390664339065552, 'timestamp': '2025-09-30 22:18:46.442813', 'step': 5533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.485326', 'step': 5533, 'epoch': 1} {'type': 'loss', 'content': 0.2525516748428345, 'timestamp': '2025-09-30 22:18:46.487959', 'step': 5534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:18:46.527324', 'step': 5534, 'epoch': 1} {'type': 'loss', 'content': 0.16445806622505188, 'timestamp': '2025-09-30 22:18:46.535593', 'step': 5535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.571590', 'step': 5535, 'epoch': 1} {'type': 'loss', 'content': 0.1506664901971817, 'timestamp': '2025-09-30 22:18:46.601464', 'step': 5536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.632983', 'step': 5536, 'epoch': 1} {'type': 'loss', 'content': 0.13780765235424042, 'timestamp': '2025-09-30 22:18:46.635536', 'step': 5537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.666103', 'step': 5537, 'epoch': 1} {'type': 'loss', 'content': 0.20326022803783417, 'timestamp': '2025-09-30 22:18:46.675666', 'step': 5538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:46.715317', 'step': 5538, 'epoch': 1} {'type': 'loss', 'content': 0.1876123994588852, 'timestamp': '2025-09-30 22:18:46.723594', 'step': 5539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:46.760837', 'step': 5539, 'epoch': 1} {'type': 'loss', 'content': 0.16321901977062225, 'timestamp': '2025-09-30 22:18:46.785285', 'step': 5540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:46.818741', 'step': 5540, 'epoch': 1} {'type': 'loss', 'content': 0.1313796490430832, 'timestamp': '2025-09-30 22:18:46.824573', 'step': 5541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:46.857836', 'step': 5541, 'epoch': 1} {'type': 'loss', 'content': 0.13706058263778687, 'timestamp': '2025-09-30 22:18:46.862418', 'step': 5542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:46.895514', 'step': 5542, 'epoch': 1} {'type': 'loss', 'content': 0.12079641968011856, 'timestamp': '2025-09-30 22:18:46.899696', 'step': 5543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:46.929839', 'step': 5543, 'epoch': 1} {'type': 'loss', 'content': 0.18783165514469147, 'timestamp': '2025-09-30 22:18:46.956346', 'step': 5544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:46.986869', 'step': 5544, 'epoch': 1} {'type': 'loss', 'content': 0.10077688097953796, 'timestamp': '2025-09-30 22:18:46.990245', 'step': 5545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.022378', 'step': 5545, 'epoch': 1} {'type': 'loss', 'content': 0.2095704823732376, 'timestamp': '2025-09-30 22:18:47.025384', 'step': 5546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.057865', 'step': 5546, 'epoch': 1} {'type': 'loss', 'content': 0.1336427479982376, 'timestamp': '2025-09-30 22:18:47.075166', 'step': 5547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:47.106525', 'step': 5547, 'epoch': 1} {'type': 'loss', 'content': 0.20104633271694183, 'timestamp': '2025-09-30 22:18:47.132117', 'step': 5548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:47.163363', 'step': 5548, 'epoch': 1} {'type': 'loss', 'content': 0.15398479998111725, 'timestamp': '2025-09-30 22:18:47.167400', 'step': 5549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.199565', 'step': 5549, 'epoch': 1} {'type': 'loss', 'content': 0.16443757712841034, 'timestamp': '2025-09-30 22:18:47.216981', 'step': 5550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.262834', 'step': 5550, 'epoch': 1} {'type': 'loss', 'content': 0.3020549416542053, 'timestamp': '2025-09-30 22:18:47.266698', 'step': 5551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:47.297914', 'step': 5551, 'epoch': 1} {'type': 'loss', 'content': 0.22316566109657288, 'timestamp': '2025-09-30 22:18:47.324599', 'step': 5552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.357780', 'step': 5552, 'epoch': 1} {'type': 'loss', 'content': 0.2547549903392792, 'timestamp': '2025-09-30 22:18:47.376164', 'step': 5553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:47.422391', 'step': 5553, 'epoch': 1} {'type': 'loss', 'content': 0.1621439903974533, 'timestamp': '2025-09-30 22:18:47.427979', 'step': 5554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:47.460137', 'step': 5554, 'epoch': 1} {'type': 'loss', 'content': 0.12124615907669067, 'timestamp': '2025-09-30 22:18:47.465278', 'step': 5555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:47.495868', 'step': 5555, 'epoch': 1} {'type': 'loss', 'content': 0.13123191893100739, 'timestamp': '2025-09-30 22:18:47.521687', 'step': 5556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.553080', 'step': 5556, 'epoch': 1} {'type': 'loss', 'content': 0.2161521464586258, 'timestamp': '2025-09-30 22:18:47.560477', 'step': 5557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:47.596532', 'step': 5557, 'epoch': 1} {'type': 'loss', 'content': 0.18170331418514252, 'timestamp': '2025-09-30 22:18:47.599655', 'step': 5558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:47.639533', 'step': 5558, 'epoch': 1} {'type': 'loss', 'content': 0.1295471340417862, 'timestamp': '2025-09-30 22:18:47.649911', 'step': 5559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.689068', 'step': 5559, 'epoch': 1} {'type': 'loss', 'content': 0.08122917264699936, 'timestamp': '2025-09-30 22:18:47.724843', 'step': 5560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:47.769326', 'step': 5560, 'epoch': 1} {'type': 'loss', 'content': 0.1013178899884224, 'timestamp': '2025-09-30 22:18:47.773896', 'step': 5561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:47.806029', 'step': 5561, 'epoch': 1} {'type': 'loss', 'content': 0.1222415417432785, 'timestamp': '2025-09-30 22:18:47.823360', 'step': 5562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:47.856452', 'step': 5562, 'epoch': 1} {'type': 'loss', 'content': 0.24030479788780212, 'timestamp': '2025-09-30 22:18:47.860808', 'step': 5563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:18:47.893666', 'step': 5563, 'epoch': 1} {'type': 'loss', 'content': 0.13659287989139557, 'timestamp': '2025-09-30 22:18:47.919528', 'step': 5564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:47.950453', 'step': 5564, 'epoch': 1} {'type': 'loss', 'content': 0.1598234921693802, 'timestamp': '2025-09-30 22:18:47.954248', 'step': 5565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:47.985567', 'step': 5565, 'epoch': 1} {'type': 'loss', 'content': 0.2616301476955414, 'timestamp': '2025-09-30 22:18:48.003781', 'step': 5566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.050277', 'step': 5566, 'epoch': 1} {'type': 'loss', 'content': 0.14669837057590485, 'timestamp': '2025-09-30 22:18:48.067905', 'step': 5567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:48.100326', 'step': 5567, 'epoch': 1} {'type': 'loss', 'content': 0.1645265370607376, 'timestamp': '2025-09-30 22:18:48.127785', 'step': 5568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.173402', 'step': 5568, 'epoch': 1} {'type': 'loss', 'content': 0.2393309623003006, 'timestamp': '2025-09-30 22:18:48.189870', 'step': 5569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.237635', 'step': 5569, 'epoch': 1} {'type': 'loss', 'content': 0.13892783224582672, 'timestamp': '2025-09-30 22:18:48.254334', 'step': 5570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.285378', 'step': 5570, 'epoch': 1} {'type': 'loss', 'content': 0.17965634167194366, 'timestamp': '2025-09-30 22:18:48.288761', 'step': 5571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:18:48.336430', 'step': 5571, 'epoch': 1} {'type': 'loss', 'content': 0.10286733508110046, 'timestamp': '2025-09-30 22:18:48.360856', 'step': 5572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.395897', 'step': 5572, 'epoch': 1} {'type': 'loss', 'content': 0.09181665629148483, 'timestamp': '2025-09-30 22:18:48.411306', 'step': 5573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:48.442998', 'step': 5573, 'epoch': 1} {'type': 'loss', 'content': 0.1531325876712799, 'timestamp': '2025-09-30 22:18:48.445301', 'step': 5574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:48.481712', 'step': 5574, 'epoch': 1} {'type': 'loss', 'content': 0.11682101339101791, 'timestamp': '2025-09-30 22:18:48.486020', 'step': 5575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.516333', 'step': 5575, 'epoch': 1} {'type': 'loss', 'content': 0.10181935876607895, 'timestamp': '2025-09-30 22:18:48.540941', 'step': 5576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.570617', 'step': 5576, 'epoch': 1} {'type': 'loss', 'content': 0.14243470132350922, 'timestamp': '2025-09-30 22:18:48.578814', 'step': 5577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:48.610951', 'step': 5577, 'epoch': 1} {'type': 'loss', 'content': 0.12964525818824768, 'timestamp': '2025-09-30 22:18:48.619343', 'step': 5578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.658146', 'step': 5578, 'epoch': 1} {'type': 'loss', 'content': 0.1246924102306366, 'timestamp': '2025-09-30 22:18:48.660088', 'step': 5579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:48.693517', 'step': 5579, 'epoch': 1} {'type': 'loss', 'content': 0.16959446668624878, 'timestamp': '2025-09-30 22:18:48.717823', 'step': 5580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.753428', 'step': 5580, 'epoch': 1} {'type': 'loss', 'content': 0.09728846698999405, 'timestamp': '2025-09-30 22:18:48.759677', 'step': 5581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:48.790575', 'step': 5581, 'epoch': 1} {'type': 'loss', 'content': 0.1269998401403427, 'timestamp': '2025-09-30 22:18:48.793838', 'step': 5582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.824229', 'step': 5582, 'epoch': 1} {'type': 'loss', 'content': 0.1273571252822876, 'timestamp': '2025-09-30 22:18:48.826869', 'step': 5583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.859545', 'step': 5583, 'epoch': 1} {'type': 'loss', 'content': 0.17256255447864532, 'timestamp': '2025-09-30 22:18:48.886525', 'step': 5584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:48.916999', 'step': 5584, 'epoch': 1} {'type': 'loss', 'content': 0.27818313241004944, 'timestamp': '2025-09-30 22:18:48.922311', 'step': 5585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:48.956278', 'step': 5585, 'epoch': 1} {'type': 'loss', 'content': 0.07123945653438568, 'timestamp': '2025-09-30 22:18:48.958776', 'step': 5586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:48.988920', 'step': 5586, 'epoch': 1} {'type': 'loss', 'content': 0.0923948884010315, 'timestamp': '2025-09-30 22:18:48.995237', 'step': 5587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:49.031924', 'step': 5587, 'epoch': 1} {'type': 'loss', 'content': 0.14395850896835327, 'timestamp': '2025-09-30 22:18:49.061783', 'step': 5588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:49.092336', 'step': 5588, 'epoch': 1} {'type': 'loss', 'content': 0.1329844892024994, 'timestamp': '2025-09-30 22:18:49.094664', 'step': 5589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:49.124814', 'step': 5589, 'epoch': 1} {'type': 'loss', 'content': 0.10023807734251022, 'timestamp': '2025-09-30 22:18:49.131851', 'step': 5590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:49.167184', 'step': 5590, 'epoch': 1} {'type': 'loss', 'content': 0.21129046380519867, 'timestamp': '2025-09-30 22:18:49.170337', 'step': 5591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:49.204372', 'step': 5591, 'epoch': 1} {'type': 'loss', 'content': 0.0781109556555748, 'timestamp': '2025-09-30 22:18:49.228566', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:18:56.808215', 'step': 5592, 'epoch': 1} {'type': 'pplx', 'content': 8016.03749218191, 'timestamp': '2025-09-30 22:18:56.811117', 'step': 5592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:56.842554', 'step': 5592, 'epoch': 1} {'type': 'loss', 'content': 0.13805387914180756, 'timestamp': '2025-09-30 22:18:56.848567', 'step': 5593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:56.880387', 'step': 5593, 'epoch': 1} {'type': 'loss', 'content': 0.12498575448989868, 'timestamp': '2025-09-30 22:18:56.884686', 'step': 5594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:56.916757', 'step': 5594, 'epoch': 1} {'type': 'loss', 'content': 0.18889470398426056, 'timestamp': '2025-09-30 22:18:56.919467', 'step': 5595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:56.950198', 'step': 5595, 'epoch': 1} {'type': 'loss', 'content': 0.16121037304401398, 'timestamp': '2025-09-30 22:18:56.979702', 'step': 5596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.012208', 'step': 5596, 'epoch': 1} {'type': 'loss', 'content': 0.3026898205280304, 'timestamp': '2025-09-30 22:18:57.015645', 'step': 5597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.052487', 'step': 5597, 'epoch': 1} {'type': 'loss', 'content': 0.11735286563634872, 'timestamp': '2025-09-30 22:18:57.055871', 'step': 5598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:57.085388', 'step': 5598, 'epoch': 1} {'type': 'loss', 'content': 0.14188803732395172, 'timestamp': '2025-09-30 22:18:57.088851', 'step': 5599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:57.119108', 'step': 5599, 'epoch': 1} {'type': 'loss', 'content': 0.1364971250295639, 'timestamp': '2025-09-30 22:18:57.151528', 'step': 5600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.182089', 'step': 5600, 'epoch': 1} {'type': 'loss', 'content': 0.17691780626773834, 'timestamp': '2025-09-30 22:18:57.196129', 'step': 5601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:57.227690', 'step': 5601, 'epoch': 1} {'type': 'loss', 'content': 0.161603182554245, 'timestamp': '2025-09-30 22:18:57.231844', 'step': 5602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.299312', 'step': 5602, 'epoch': 1} {'type': 'loss', 'content': 0.20199404656887054, 'timestamp': '2025-09-30 22:18:57.302861', 'step': 5603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.342830', 'step': 5603, 'epoch': 1} {'type': 'loss', 'content': 0.14151985943317413, 'timestamp': '2025-09-30 22:18:57.368255', 'step': 5604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.413161', 'step': 5604, 'epoch': 1} {'type': 'loss', 'content': 0.1467050462961197, 'timestamp': '2025-09-30 22:18:57.417083', 'step': 5605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:57.463004', 'step': 5605, 'epoch': 1} {'type': 'loss', 'content': 0.06660439819097519, 'timestamp': '2025-09-30 22:18:57.467222', 'step': 5606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:57.500336', 'step': 5606, 'epoch': 1} {'type': 'loss', 'content': 0.1548338234424591, 'timestamp': '2025-09-30 22:18:57.503186', 'step': 5607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:57.552835', 'step': 5607, 'epoch': 1} {'type': 'loss', 'content': 0.22001320123672485, 'timestamp': '2025-09-30 22:18:57.578076', 'step': 5608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.616489', 'step': 5608, 'epoch': 1} {'type': 'loss', 'content': 0.16190867125988007, 'timestamp': '2025-09-30 22:18:57.620356', 'step': 5609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:57.663653', 'step': 5609, 'epoch': 1} {'type': 'loss', 'content': 0.11316855251789093, 'timestamp': '2025-09-30 22:18:57.666188', 'step': 5610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.702023', 'step': 5610, 'epoch': 1} {'type': 'loss', 'content': 0.07973988354206085, 'timestamp': '2025-09-30 22:18:57.705682', 'step': 5611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:57.741530', 'step': 5611, 'epoch': 1} {'type': 'loss', 'content': 0.12149931490421295, 'timestamp': '2025-09-30 22:18:57.767090', 'step': 5612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:57.807344', 'step': 5612, 'epoch': 1} {'type': 'loss', 'content': 0.15618328750133514, 'timestamp': '2025-09-30 22:18:57.810705', 'step': 5613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.848376', 'step': 5613, 'epoch': 1} {'type': 'loss', 'content': 0.17124029994010925, 'timestamp': '2025-09-30 22:18:57.852468', 'step': 5614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.885278', 'step': 5614, 'epoch': 1} {'type': 'loss', 'content': 0.13569194078445435, 'timestamp': '2025-09-30 22:18:57.887681', 'step': 5615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:57.926881', 'step': 5615, 'epoch': 1} {'type': 'loss', 'content': 0.11591745913028717, 'timestamp': '2025-09-30 22:18:57.958598', 'step': 5616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:57.996781', 'step': 5616, 'epoch': 1} {'type': 'loss', 'content': 0.12466903030872345, 'timestamp': '2025-09-30 22:18:57.999226', 'step': 5617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.048716', 'step': 5617, 'epoch': 1} {'type': 'loss', 'content': 0.1930474489927292, 'timestamp': '2025-09-30 22:18:58.051814', 'step': 5618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:58.103442', 'step': 5618, 'epoch': 1} {'type': 'loss', 'content': 0.11574120074510574, 'timestamp': '2025-09-30 22:18:58.106119', 'step': 5619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.140856', 'step': 5619, 'epoch': 1} {'type': 'loss', 'content': 0.15968802571296692, 'timestamp': '2025-09-30 22:18:58.165387', 'step': 5620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:58.210195', 'step': 5620, 'epoch': 1} {'type': 'loss', 'content': 0.272026389837265, 'timestamp': '2025-09-30 22:18:58.213284', 'step': 5621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:58.250169', 'step': 5621, 'epoch': 1} {'type': 'loss', 'content': 0.16243301331996918, 'timestamp': '2025-09-30 22:18:58.252966', 'step': 5622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:58.297934', 'step': 5622, 'epoch': 1} {'type': 'loss', 'content': 0.14306843280792236, 'timestamp': '2025-09-30 22:18:58.302059', 'step': 5623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:58.336148', 'step': 5623, 'epoch': 1} {'type': 'loss', 'content': 0.17128407955169678, 'timestamp': '2025-09-30 22:18:58.362588', 'step': 5624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.399405', 'step': 5624, 'epoch': 1} {'type': 'loss', 'content': 0.2038179188966751, 'timestamp': '2025-09-30 22:18:58.406467', 'step': 5625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.448206', 'step': 5625, 'epoch': 1} {'type': 'loss', 'content': 0.1934003233909607, 'timestamp': '2025-09-30 22:18:58.451437', 'step': 5626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.491984', 'step': 5626, 'epoch': 1} {'type': 'loss', 'content': 0.32213926315307617, 'timestamp': '2025-09-30 22:18:58.495787', 'step': 5627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.529556', 'step': 5627, 'epoch': 1} {'type': 'loss', 'content': 0.16357123851776123, 'timestamp': '2025-09-30 22:18:58.553753', 'step': 5628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.603154', 'step': 5628, 'epoch': 1} {'type': 'loss', 'content': 0.18598254024982452, 'timestamp': '2025-09-30 22:18:58.609941', 'step': 5629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.645597', 'step': 5629, 'epoch': 1} {'type': 'loss', 'content': 0.22269946336746216, 'timestamp': '2025-09-30 22:18:58.651711', 'step': 5630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:58.691333', 'step': 5630, 'epoch': 1} {'type': 'loss', 'content': 0.1697312444448471, 'timestamp': '2025-09-30 22:18:58.698384', 'step': 5631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:58.731677', 'step': 5631, 'epoch': 1} {'type': 'loss', 'content': 0.18316997587680817, 'timestamp': '2025-09-30 22:18:58.755616', 'step': 5632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:58.790110', 'step': 5632, 'epoch': 1} {'type': 'loss', 'content': 0.1659851372241974, 'timestamp': '2025-09-30 22:18:58.793041', 'step': 5633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.832349', 'step': 5633, 'epoch': 1} {'type': 'loss', 'content': 0.14910705387592316, 'timestamp': '2025-09-30 22:18:58.835051', 'step': 5634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:58.866632', 'step': 5634, 'epoch': 1} {'type': 'loss', 'content': 0.2036271095275879, 'timestamp': '2025-09-30 22:18:58.869012', 'step': 5635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:58.899659', 'step': 5635, 'epoch': 1} {'type': 'loss', 'content': 0.20373643934726715, 'timestamp': '2025-09-30 22:18:58.926693', 'step': 5636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:58.957530', 'step': 5636, 'epoch': 1} {'type': 'loss', 'content': 0.1570892035961151, 'timestamp': '2025-09-30 22:18:58.963345', 'step': 5637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:58.996026', 'step': 5637, 'epoch': 1} {'type': 'loss', 'content': 0.13020773231983185, 'timestamp': '2025-09-30 22:18:59.000339', 'step': 5638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.031518', 'step': 5638, 'epoch': 1} {'type': 'loss', 'content': 0.1876729279756546, 'timestamp': '2025-09-30 22:18:59.035355', 'step': 5639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:59.071749', 'step': 5639, 'epoch': 1} {'type': 'loss', 'content': 0.1737123280763626, 'timestamp': '2025-09-30 22:18:59.095950', 'step': 5640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.128059', 'step': 5640, 'epoch': 1} {'type': 'loss', 'content': 0.15257161855697632, 'timestamp': '2025-09-30 22:18:59.130661', 'step': 5641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:59.161639', 'step': 5641, 'epoch': 1} {'type': 'loss', 'content': 0.24329985678195953, 'timestamp': '2025-09-30 22:18:59.164932', 'step': 5642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:18:59.196086', 'step': 5642, 'epoch': 1} {'type': 'loss', 'content': 0.2514432370662689, 'timestamp': '2025-09-30 22:18:59.203516', 'step': 5643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.237583', 'step': 5643, 'epoch': 1} {'type': 'loss', 'content': 0.163013756275177, 'timestamp': '2025-09-30 22:18:59.263983', 'step': 5644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.295434', 'step': 5644, 'epoch': 1} {'type': 'loss', 'content': 0.15589049458503723, 'timestamp': '2025-09-30 22:18:59.297743', 'step': 5645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.331424', 'step': 5645, 'epoch': 1} {'type': 'loss', 'content': 0.17259956896305084, 'timestamp': '2025-09-30 22:18:59.334574', 'step': 5646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:59.367001', 'step': 5646, 'epoch': 1} {'type': 'loss', 'content': 0.0859665721654892, 'timestamp': '2025-09-30 22:18:59.374415', 'step': 5647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:18:59.406497', 'step': 5647, 'epoch': 1} {'type': 'loss', 'content': 0.2190266251564026, 'timestamp': '2025-09-30 22:18:59.432867', 'step': 5648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:59.467583', 'step': 5648, 'epoch': 1} {'type': 'loss', 'content': 0.15801864862442017, 'timestamp': '2025-09-30 22:18:59.479522', 'step': 5649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:59.510804', 'step': 5649, 'epoch': 1} {'type': 'loss', 'content': 0.14745955169200897, 'timestamp': '2025-09-30 22:18:59.514323', 'step': 5650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:59.547163', 'step': 5650, 'epoch': 1} {'type': 'loss', 'content': 0.1295727640390396, 'timestamp': '2025-09-30 22:18:59.557914', 'step': 5651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.599628', 'step': 5651, 'epoch': 1} {'type': 'loss', 'content': 0.045579954981803894, 'timestamp': '2025-09-30 22:18:59.624089', 'step': 5652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.663152', 'step': 5652, 'epoch': 1} {'type': 'loss', 'content': 0.11070206016302109, 'timestamp': '2025-09-30 22:18:59.670408', 'step': 5653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:18:59.701745', 'step': 5653, 'epoch': 1} {'type': 'loss', 'content': 0.1961681991815567, 'timestamp': '2025-09-30 22:18:59.704557', 'step': 5654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.741326', 'step': 5654, 'epoch': 1} {'type': 'loss', 'content': 0.15252216160297394, 'timestamp': '2025-09-30 22:18:59.747036', 'step': 5655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:59.778629', 'step': 5655, 'epoch': 1} {'type': 'loss', 'content': 0.18032653629779816, 'timestamp': '2025-09-30 22:18:59.808477', 'step': 5656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:18:59.840412', 'step': 5656, 'epoch': 1} {'type': 'loss', 'content': 0.14523644745349884, 'timestamp': '2025-09-30 22:18:59.847352', 'step': 5657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:18:59.878369', 'step': 5657, 'epoch': 1} {'type': 'loss', 'content': 0.25094544887542725, 'timestamp': '2025-09-30 22:18:59.881712', 'step': 5658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:59.912050', 'step': 5658, 'epoch': 1} {'type': 'loss', 'content': 0.173031747341156, 'timestamp': '2025-09-30 22:18:59.917857', 'step': 5659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:18:59.948885', 'step': 5659, 'epoch': 1} {'type': 'loss', 'content': 0.09973276406526566, 'timestamp': '2025-09-30 22:18:59.972584', 'step': 5660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.002457', 'step': 5660, 'epoch': 1} {'type': 'loss', 'content': 0.16627345979213715, 'timestamp': '2025-09-30 22:19:00.005536', 'step': 5661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:00.040690', 'step': 5661, 'epoch': 1} {'type': 'loss', 'content': 0.174180269241333, 'timestamp': '2025-09-30 22:19:00.050231', 'step': 5662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:00.087237', 'step': 5662, 'epoch': 1} {'type': 'loss', 'content': 0.10814262181520462, 'timestamp': '2025-09-30 22:19:00.090326', 'step': 5663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.127386', 'step': 5663, 'epoch': 1} {'type': 'loss', 'content': 0.12205544859170914, 'timestamp': '2025-09-30 22:19:00.152136', 'step': 5664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.185315', 'step': 5664, 'epoch': 1} {'type': 'loss', 'content': 0.114386186003685, 'timestamp': '2025-09-30 22:19:00.188960', 'step': 5665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:00.221555', 'step': 5665, 'epoch': 1} {'type': 'loss', 'content': 0.07962726056575775, 'timestamp': '2025-09-30 22:19:00.225892', 'step': 5666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.256561', 'step': 5666, 'epoch': 1} {'type': 'loss', 'content': 0.10562453418970108, 'timestamp': '2025-09-30 22:19:00.259473', 'step': 5667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.290831', 'step': 5667, 'epoch': 1} {'type': 'loss', 'content': 0.12974561750888824, 'timestamp': '2025-09-30 22:19:00.319403', 'step': 5668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:19:00.355954', 'step': 5668, 'epoch': 1} {'type': 'loss', 'content': 0.14221049845218658, 'timestamp': '2025-09-30 22:19:00.363025', 'step': 5669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.399941', 'step': 5669, 'epoch': 1} {'type': 'loss', 'content': 0.07312721014022827, 'timestamp': '2025-09-30 22:19:00.403173', 'step': 5670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:00.434081', 'step': 5670, 'epoch': 1} {'type': 'loss', 'content': 0.15343399345874786, 'timestamp': '2025-09-30 22:19:00.436679', 'step': 5671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.472700', 'step': 5671, 'epoch': 1} {'type': 'loss', 'content': 0.1490149348974228, 'timestamp': '2025-09-30 22:19:00.497824', 'step': 5672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:00.528524', 'step': 5672, 'epoch': 1} {'type': 'loss', 'content': 0.23786289989948273, 'timestamp': '2025-09-30 22:19:00.536534', 'step': 5673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.567882', 'step': 5673, 'epoch': 1} {'type': 'loss', 'content': 0.13114823400974274, 'timestamp': '2025-09-30 22:19:00.573530', 'step': 5674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:00.609514', 'step': 5674, 'epoch': 1} {'type': 'loss', 'content': 0.17306241393089294, 'timestamp': '2025-09-30 22:19:00.617346', 'step': 5675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.650657', 'step': 5675, 'epoch': 1} {'type': 'loss', 'content': 0.17359858751296997, 'timestamp': '2025-09-30 22:19:00.674521', 'step': 5676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.709550', 'step': 5676, 'epoch': 1} {'type': 'loss', 'content': 0.136282816529274, 'timestamp': '2025-09-30 22:19:00.712067', 'step': 5677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.743843', 'step': 5677, 'epoch': 1} {'type': 'loss', 'content': 0.10901912301778793, 'timestamp': '2025-09-30 22:19:00.752758', 'step': 5678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:00.787431', 'step': 5678, 'epoch': 1} {'type': 'loss', 'content': 0.07767001539468765, 'timestamp': '2025-09-30 22:19:00.790449', 'step': 5679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.821938', 'step': 5679, 'epoch': 1} {'type': 'loss', 'content': 0.14930424094200134, 'timestamp': '2025-09-30 22:19:00.845989', 'step': 5680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:00.878211', 'step': 5680, 'epoch': 1} {'type': 'loss', 'content': 0.19927586615085602, 'timestamp': '2025-09-30 22:19:00.881340', 'step': 5681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:00.911752', 'step': 5681, 'epoch': 1} {'type': 'loss', 'content': 0.15727533400058746, 'timestamp': '2025-09-30 22:19:00.924319', 'step': 5682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:00.966329', 'step': 5682, 'epoch': 1} {'type': 'loss', 'content': 0.15509460866451263, 'timestamp': '2025-09-30 22:19:00.969590', 'step': 5683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.016275', 'step': 5683, 'epoch': 1} {'type': 'loss', 'content': 0.25393638014793396, 'timestamp': '2025-09-30 22:19:01.042366', 'step': 5684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.076698', 'step': 5684, 'epoch': 1} {'type': 'loss', 'content': 0.15437471866607666, 'timestamp': '2025-09-30 22:19:01.085622', 'step': 5685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.122824', 'step': 5685, 'epoch': 1} {'type': 'loss', 'content': 0.18747150897979736, 'timestamp': '2025-09-30 22:19:01.126128', 'step': 5686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.161670', 'step': 5686, 'epoch': 1} {'type': 'loss', 'content': 0.14402760565280914, 'timestamp': '2025-09-30 22:19:01.164698', 'step': 5687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:01.198980', 'step': 5687, 'epoch': 1} {'type': 'loss', 'content': 0.20400747656822205, 'timestamp': '2025-09-30 22:19:01.227491', 'step': 5688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:01.258370', 'step': 5688, 'epoch': 1} {'type': 'loss', 'content': 0.15638676285743713, 'timestamp': '2025-09-30 22:19:01.261368', 'step': 5689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.294557', 'step': 5689, 'epoch': 1} {'type': 'loss', 'content': 0.19890806078910828, 'timestamp': '2025-09-30 22:19:01.301883', 'step': 5690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:01.336351', 'step': 5690, 'epoch': 1} {'type': 'loss', 'content': 0.22195479273796082, 'timestamp': '2025-09-30 22:19:01.339657', 'step': 5691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:01.374738', 'step': 5691, 'epoch': 1} {'type': 'loss', 'content': 0.14208096265792847, 'timestamp': '2025-09-30 22:19:01.405703', 'step': 5692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:01.436459', 'step': 5692, 'epoch': 1} {'type': 'loss', 'content': 0.11627038568258286, 'timestamp': '2025-09-30 22:19:01.439437', 'step': 5693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:01.479597', 'step': 5693, 'epoch': 1} {'type': 'loss', 'content': 0.24981001019477844, 'timestamp': '2025-09-30 22:19:01.486534', 'step': 5694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:01.523799', 'step': 5694, 'epoch': 1} {'type': 'loss', 'content': 0.140231192111969, 'timestamp': '2025-09-30 22:19:01.533452', 'step': 5695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.569322', 'step': 5695, 'epoch': 1} {'type': 'loss', 'content': 0.25678199529647827, 'timestamp': '2025-09-30 22:19:01.597982', 'step': 5696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:01.628300', 'step': 5696, 'epoch': 1} {'type': 'loss', 'content': 0.11763892322778702, 'timestamp': '2025-09-30 22:19:01.631102', 'step': 5697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:01.661393', 'step': 5697, 'epoch': 1} {'type': 'loss', 'content': 0.19048656523227692, 'timestamp': '2025-09-30 22:19:01.667617', 'step': 5698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.702046', 'step': 5698, 'epoch': 1} {'type': 'loss', 'content': 0.1217002421617508, 'timestamp': '2025-09-30 22:19:01.707371', 'step': 5699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.738472', 'step': 5699, 'epoch': 1} {'type': 'loss', 'content': 0.0845998153090477, 'timestamp': '2025-09-30 22:19:01.762806', 'step': 5700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:01.793985', 'step': 5700, 'epoch': 1} {'type': 'loss', 'content': 0.1899840235710144, 'timestamp': '2025-09-30 22:19:01.797002', 'step': 5701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.830330', 'step': 5701, 'epoch': 1} {'type': 'loss', 'content': 0.12851732969284058, 'timestamp': '2025-09-30 22:19:01.832957', 'step': 5702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.867994', 'step': 5702, 'epoch': 1} {'type': 'loss', 'content': 0.1738765388727188, 'timestamp': '2025-09-30 22:19:01.870871', 'step': 5703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:01.903499', 'step': 5703, 'epoch': 1} {'type': 'loss', 'content': 0.11776619404554367, 'timestamp': '2025-09-30 22:19:01.929550', 'step': 5704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:01.968270', 'step': 5704, 'epoch': 1} {'type': 'loss', 'content': 0.15717542171478271, 'timestamp': '2025-09-30 22:19:01.971686', 'step': 5705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:02.009871', 'step': 5705, 'epoch': 1} {'type': 'loss', 'content': 0.14758199453353882, 'timestamp': '2025-09-30 22:19:02.013208', 'step': 5706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:02.045524', 'step': 5706, 'epoch': 1} {'type': 'loss', 'content': 0.1356617957353592, 'timestamp': '2025-09-30 22:19:02.054902', 'step': 5707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.086175', 'step': 5707, 'epoch': 1} {'type': 'loss', 'content': 0.14940474927425385, 'timestamp': '2025-09-30 22:19:02.117612', 'step': 5708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:02.150413', 'step': 5708, 'epoch': 1} {'type': 'loss', 'content': 0.22274287045001984, 'timestamp': '2025-09-30 22:19:02.156462', 'step': 5709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.191129', 'step': 5709, 'epoch': 1} {'type': 'loss', 'content': 0.20612984895706177, 'timestamp': '2025-09-30 22:19:02.196972', 'step': 5710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:02.227915', 'step': 5710, 'epoch': 1} {'type': 'loss', 'content': 0.15444447100162506, 'timestamp': '2025-09-30 22:19:02.233471', 'step': 5711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:02.269032', 'step': 5711, 'epoch': 1} {'type': 'loss', 'content': 0.07772600650787354, 'timestamp': '2025-09-30 22:19:02.295711', 'step': 5712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:02.332190', 'step': 5712, 'epoch': 1} {'type': 'loss', 'content': 0.14895911514759064, 'timestamp': '2025-09-30 22:19:02.337543', 'step': 5713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.371765', 'step': 5713, 'epoch': 1} {'type': 'loss', 'content': 0.1824583113193512, 'timestamp': '2025-09-30 22:19:02.378496', 'step': 5714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.412107', 'step': 5714, 'epoch': 1} {'type': 'loss', 'content': 0.1591905802488327, 'timestamp': '2025-09-30 22:19:02.417692', 'step': 5715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.448180', 'step': 5715, 'epoch': 1} {'type': 'loss', 'content': 0.10843565315008163, 'timestamp': '2025-09-30 22:19:02.472481', 'step': 5716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:02.506006', 'step': 5716, 'epoch': 1} {'type': 'loss', 'content': 0.1997757852077484, 'timestamp': '2025-09-30 22:19:02.509223', 'step': 5717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.542870', 'step': 5717, 'epoch': 1} {'type': 'loss', 'content': 0.10667108744382858, 'timestamp': '2025-09-30 22:19:02.545612', 'step': 5718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.580868', 'step': 5718, 'epoch': 1} {'type': 'loss', 'content': 0.11253853887319565, 'timestamp': '2025-09-30 22:19:02.584335', 'step': 5719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:02.618332', 'step': 5719, 'epoch': 1} {'type': 'loss', 'content': 0.25601375102996826, 'timestamp': '2025-09-30 22:19:02.647105', 'step': 5720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.677041', 'step': 5720, 'epoch': 1} {'type': 'loss', 'content': 0.1364934891462326, 'timestamp': '2025-09-30 22:19:02.680216', 'step': 5721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.710885', 'step': 5721, 'epoch': 1} {'type': 'loss', 'content': 0.13316792249679565, 'timestamp': '2025-09-30 22:19:02.718378', 'step': 5722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.751165', 'step': 5722, 'epoch': 1} {'type': 'loss', 'content': 0.149660125374794, 'timestamp': '2025-09-30 22:19:02.756884', 'step': 5723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.789860', 'step': 5723, 'epoch': 1} {'type': 'loss', 'content': 0.0965338796377182, 'timestamp': '2025-09-30 22:19:02.817018', 'step': 5724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:02.847853', 'step': 5724, 'epoch': 1} {'type': 'loss', 'content': 0.13906562328338623, 'timestamp': '2025-09-30 22:19:02.852745', 'step': 5725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.886138', 'step': 5725, 'epoch': 1} {'type': 'loss', 'content': 0.18827344477176666, 'timestamp': '2025-09-30 22:19:02.889312', 'step': 5726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:02.920988', 'step': 5726, 'epoch': 1} {'type': 'loss', 'content': 0.1133425310254097, 'timestamp': '2025-09-30 22:19:02.923833', 'step': 5727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:02.954030', 'step': 5727, 'epoch': 1} {'type': 'loss', 'content': 0.10555139183998108, 'timestamp': '2025-09-30 22:19:02.977747', 'step': 5728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.008082', 'step': 5728, 'epoch': 1} {'type': 'loss', 'content': 0.14382480084896088, 'timestamp': '2025-09-30 22:19:03.016329', 'step': 5729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.047240', 'step': 5729, 'epoch': 1} {'type': 'loss', 'content': 0.14085932075977325, 'timestamp': '2025-09-30 22:19:03.052888', 'step': 5730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:03.086854', 'step': 5730, 'epoch': 1} {'type': 'loss', 'content': 0.07696085423231125, 'timestamp': '2025-09-30 22:19:03.089925', 'step': 5731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:03.121464', 'step': 5731, 'epoch': 1} {'type': 'loss', 'content': 0.11814727634191513, 'timestamp': '2025-09-30 22:19:03.145910', 'step': 5732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.184773', 'step': 5732, 'epoch': 1} {'type': 'loss', 'content': 0.17020410299301147, 'timestamp': '2025-09-30 22:19:03.194117', 'step': 5733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:03.225125', 'step': 5733, 'epoch': 1} {'type': 'loss', 'content': 0.09747479110956192, 'timestamp': '2025-09-30 22:19:03.227732', 'step': 5734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:03.260276', 'step': 5734, 'epoch': 1} {'type': 'loss', 'content': 0.12424702197313309, 'timestamp': '2025-09-30 22:19:03.276216', 'step': 5735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:03.312783', 'step': 5735, 'epoch': 1} {'type': 'loss', 'content': 0.13990096747875214, 'timestamp': '2025-09-30 22:19:03.343246', 'step': 5736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:03.380361', 'step': 5736, 'epoch': 1} {'type': 'loss', 'content': 0.17771084606647491, 'timestamp': '2025-09-30 22:19:03.383634', 'step': 5737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:03.421656', 'step': 5737, 'epoch': 1} {'type': 'loss', 'content': 0.11681627482175827, 'timestamp': '2025-09-30 22:19:03.427214', 'step': 5738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.458937', 'step': 5738, 'epoch': 1} {'type': 'loss', 'content': 0.12307240813970566, 'timestamp': '2025-09-30 22:19:03.465320', 'step': 5739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:03.495603', 'step': 5739, 'epoch': 1} {'type': 'loss', 'content': 0.13868962228298187, 'timestamp': '2025-09-30 22:19:03.523370', 'step': 5740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.554344', 'step': 5740, 'epoch': 1} {'type': 'loss', 'content': 0.10877414792776108, 'timestamp': '2025-09-30 22:19:03.559735', 'step': 5741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:03.590418', 'step': 5741, 'epoch': 1} {'type': 'loss', 'content': 0.15506260097026825, 'timestamp': '2025-09-30 22:19:03.593434', 'step': 5742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.624277', 'step': 5742, 'epoch': 1} {'type': 'loss', 'content': 0.11304329335689545, 'timestamp': '2025-09-30 22:19:03.627806', 'step': 5743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:03.662643', 'step': 5743, 'epoch': 1} {'type': 'loss', 'content': 0.11250033229589462, 'timestamp': '2025-09-30 22:19:03.687189', 'step': 5744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.717148', 'step': 5744, 'epoch': 1} {'type': 'loss', 'content': 0.10410290211439133, 'timestamp': '2025-09-30 22:19:03.721719', 'step': 5745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.755596', 'step': 5745, 'epoch': 1} {'type': 'loss', 'content': 0.21649958193302155, 'timestamp': '2025-09-30 22:19:03.758479', 'step': 5746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.791543', 'step': 5746, 'epoch': 1} {'type': 'loss', 'content': 0.15919575095176697, 'timestamp': '2025-09-30 22:19:03.794193', 'step': 5747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:03.824945', 'step': 5747, 'epoch': 1} {'type': 'loss', 'content': 0.15294848382472992, 'timestamp': '2025-09-30 22:19:03.861431', 'step': 5748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.892943', 'step': 5748, 'epoch': 1} {'type': 'loss', 'content': 0.2179637849330902, 'timestamp': '2025-09-30 22:19:03.900670', 'step': 5749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.936016', 'step': 5749, 'epoch': 1} {'type': 'loss', 'content': 0.1329696923494339, 'timestamp': '2025-09-30 22:19:03.938434', 'step': 5750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:03.969045', 'step': 5750, 'epoch': 1} {'type': 'loss', 'content': 0.15140117704868317, 'timestamp': '2025-09-30 22:19:03.972609', 'step': 5751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.005493', 'step': 5751, 'epoch': 1} {'type': 'loss', 'content': 0.10394654422998428, 'timestamp': '2025-09-30 22:19:04.033040', 'step': 5752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:04.067112', 'step': 5752, 'epoch': 1} {'type': 'loss', 'content': 0.0690070241689682, 'timestamp': '2025-09-30 22:19:04.071184', 'step': 5753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:04.110588', 'step': 5753, 'epoch': 1} {'type': 'loss', 'content': 0.10587494820356369, 'timestamp': '2025-09-30 22:19:04.121736', 'step': 5754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:04.153262', 'step': 5754, 'epoch': 1} {'type': 'loss', 'content': 0.13002929091453552, 'timestamp': '2025-09-30 22:19:04.156184', 'step': 5755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.187029', 'step': 5755, 'epoch': 1} {'type': 'loss', 'content': 0.11579263210296631, 'timestamp': '2025-09-30 22:19:04.212913', 'step': 5756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:04.251095', 'step': 5756, 'epoch': 1} {'type': 'loss', 'content': 0.1459825038909912, 'timestamp': '2025-09-30 22:19:04.258539', 'step': 5757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:04.298639', 'step': 5757, 'epoch': 1} {'type': 'loss', 'content': 0.1800628900527954, 'timestamp': '2025-09-30 22:19:04.303777', 'step': 5758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:04.339956', 'step': 5758, 'epoch': 1} {'type': 'loss', 'content': 0.11914397776126862, 'timestamp': '2025-09-30 22:19:04.344792', 'step': 5759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.377307', 'step': 5759, 'epoch': 1} {'type': 'loss', 'content': 0.09935979545116425, 'timestamp': '2025-09-30 22:19:04.404596', 'step': 5760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.435463', 'step': 5760, 'epoch': 1} {'type': 'loss', 'content': 0.14808715879917145, 'timestamp': '2025-09-30 22:19:04.440593', 'step': 5761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:04.471537', 'step': 5761, 'epoch': 1} {'type': 'loss', 'content': 0.14769011735916138, 'timestamp': '2025-09-30 22:19:04.477790', 'step': 5762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.518198', 'step': 5762, 'epoch': 1} {'type': 'loss', 'content': 0.05982881784439087, 'timestamp': '2025-09-30 22:19:04.521001', 'step': 5763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.551557', 'step': 5763, 'epoch': 1} {'type': 'loss', 'content': 0.12898094952106476, 'timestamp': '2025-09-30 22:19:04.579550', 'step': 5764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.610625', 'step': 5764, 'epoch': 1} {'type': 'loss', 'content': 0.34364455938339233, 'timestamp': '2025-09-30 22:19:04.614432', 'step': 5765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:04.660378', 'step': 5765, 'epoch': 1} {'type': 'loss', 'content': 0.17338788509368896, 'timestamp': '2025-09-30 22:19:04.665624', 'step': 5766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:04.699038', 'step': 5766, 'epoch': 1} {'type': 'loss', 'content': 0.17152808606624603, 'timestamp': '2025-09-30 22:19:04.701758', 'step': 5767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:04.740826', 'step': 5767, 'epoch': 1} {'type': 'loss', 'content': 0.17138643562793732, 'timestamp': '2025-09-30 22:19:04.765778', 'step': 5768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:04.812593', 'step': 5768, 'epoch': 1} {'type': 'loss', 'content': 0.12701480090618134, 'timestamp': '2025-09-30 22:19:04.818290', 'step': 5769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:04.851964', 'step': 5769, 'epoch': 1} {'type': 'loss', 'content': 0.1650453507900238, 'timestamp': '2025-09-30 22:19:04.854274', 'step': 5770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:04.890594', 'step': 5770, 'epoch': 1} {'type': 'loss', 'content': 0.22841432690620422, 'timestamp': '2025-09-30 22:19:04.896064', 'step': 5771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:04.933231', 'step': 5771, 'epoch': 1} {'type': 'loss', 'content': 0.3019736409187317, 'timestamp': '2025-09-30 22:19:04.960075', 'step': 5772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.000224', 'step': 5772, 'epoch': 1} {'type': 'loss', 'content': 0.22677628695964813, 'timestamp': '2025-09-30 22:19:05.005810', 'step': 5773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.041489', 'step': 5773, 'epoch': 1} {'type': 'loss', 'content': 0.1513652205467224, 'timestamp': '2025-09-30 22:19:05.047763', 'step': 5774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.087227', 'step': 5774, 'epoch': 1} {'type': 'loss', 'content': 0.190944641828537, 'timestamp': '2025-09-30 22:19:05.096380', 'step': 5775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.134788', 'step': 5775, 'epoch': 1} {'type': 'loss', 'content': 0.1303987056016922, 'timestamp': '2025-09-30 22:19:05.158914', 'step': 5776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.191348', 'step': 5776, 'epoch': 1} {'type': 'loss', 'content': 0.11750417947769165, 'timestamp': '2025-09-30 22:19:05.195115', 'step': 5777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.224799', 'step': 5777, 'epoch': 1} {'type': 'loss', 'content': 0.13961778581142426, 'timestamp': '2025-09-30 22:19:05.229630', 'step': 5778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.262095', 'step': 5778, 'epoch': 1} {'type': 'loss', 'content': 0.20854561030864716, 'timestamp': '2025-09-30 22:19:05.264595', 'step': 5779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:05.299070', 'step': 5779, 'epoch': 1} {'type': 'loss', 'content': 0.18651282787322998, 'timestamp': '2025-09-30 22:19:05.323301', 'step': 5780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.358348', 'step': 5780, 'epoch': 1} {'type': 'loss', 'content': 0.16719767451286316, 'timestamp': '2025-09-30 22:19:05.363131', 'step': 5781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.398671', 'step': 5781, 'epoch': 1} {'type': 'loss', 'content': 0.09115071594715118, 'timestamp': '2025-09-30 22:19:05.404506', 'step': 5782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.434947', 'step': 5782, 'epoch': 1} {'type': 'loss', 'content': 0.1882910430431366, 'timestamp': '2025-09-30 22:19:05.441583', 'step': 5783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.473673', 'step': 5783, 'epoch': 1} {'type': 'loss', 'content': 0.1296369880437851, 'timestamp': '2025-09-30 22:19:05.501591', 'step': 5784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.534432', 'step': 5784, 'epoch': 1} {'type': 'loss', 'content': 0.1834023892879486, 'timestamp': '2025-09-30 22:19:05.539335', 'step': 5785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.574743', 'step': 5785, 'epoch': 1} {'type': 'loss', 'content': 0.20406591892242432, 'timestamp': '2025-09-30 22:19:05.577443', 'step': 5786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.611563', 'step': 5786, 'epoch': 1} {'type': 'loss', 'content': 0.21537446975708008, 'timestamp': '2025-09-30 22:19:05.616699', 'step': 5787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.650706', 'step': 5787, 'epoch': 1} {'type': 'loss', 'content': 0.07629165053367615, 'timestamp': '2025-09-30 22:19:05.674611', 'step': 5788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.707039', 'step': 5788, 'epoch': 1} {'type': 'loss', 'content': 0.1659199744462967, 'timestamp': '2025-09-30 22:19:05.709995', 'step': 5789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.742287', 'step': 5789, 'epoch': 1} {'type': 'loss', 'content': 0.19459865987300873, 'timestamp': '2025-09-30 22:19:05.745252', 'step': 5790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.777012', 'step': 5790, 'epoch': 1} {'type': 'loss', 'content': 0.1451108455657959, 'timestamp': '2025-09-30 22:19:05.779551', 'step': 5791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.823941', 'step': 5791, 'epoch': 1} {'type': 'loss', 'content': 0.14100022614002228, 'timestamp': '2025-09-30 22:19:05.847870', 'step': 5792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.886381', 'step': 5792, 'epoch': 1} {'type': 'loss', 'content': 0.17134012281894684, 'timestamp': '2025-09-30 22:19:05.891129', 'step': 5793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:05.922219', 'step': 5793, 'epoch': 1} {'type': 'loss', 'content': 0.17626190185546875, 'timestamp': '2025-09-30 22:19:05.926559', 'step': 5794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:05.959119', 'step': 5794, 'epoch': 1} {'type': 'loss', 'content': 0.18143554031848907, 'timestamp': '2025-09-30 22:19:05.962404', 'step': 5795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:05.992511', 'step': 5795, 'epoch': 1} {'type': 'loss', 'content': 0.22980932891368866, 'timestamp': '2025-09-30 22:19:06.018611', 'step': 5796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.051251', 'step': 5796, 'epoch': 1} {'type': 'loss', 'content': 0.1214405745267868, 'timestamp': '2025-09-30 22:19:06.057030', 'step': 5797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:06.091451', 'step': 5797, 'epoch': 1} {'type': 'loss', 'content': 0.06975552439689636, 'timestamp': '2025-09-30 22:19:06.095113', 'step': 5798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.128712', 'step': 5798, 'epoch': 1} {'type': 'loss', 'content': 0.11115428060293198, 'timestamp': '2025-09-30 22:19:06.133889', 'step': 5799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:06.164084', 'step': 5799, 'epoch': 1} {'type': 'loss', 'content': 0.1346806138753891, 'timestamp': '2025-09-30 22:19:06.191373', 'step': 5800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.231512', 'step': 5800, 'epoch': 1} {'type': 'loss', 'content': 0.2599027156829834, 'timestamp': '2025-09-30 22:19:06.235309', 'step': 5801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:06.269766', 'step': 5801, 'epoch': 1} {'type': 'loss', 'content': 0.2536490559577942, 'timestamp': '2025-09-30 22:19:06.272988', 'step': 5802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:06.305132', 'step': 5802, 'epoch': 1} {'type': 'loss', 'content': 0.05067434534430504, 'timestamp': '2025-09-30 22:19:06.313894', 'step': 5803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.359911', 'step': 5803, 'epoch': 1} {'type': 'loss', 'content': 0.11795765161514282, 'timestamp': '2025-09-30 22:19:06.390012', 'step': 5804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.422542', 'step': 5804, 'epoch': 1} {'type': 'loss', 'content': 0.11001978814601898, 'timestamp': '2025-09-30 22:19:06.431347', 'step': 5805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:06.468888', 'step': 5805, 'epoch': 1} {'type': 'loss', 'content': 0.19972236454486847, 'timestamp': '2025-09-30 22:19:06.478526', 'step': 5806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:06.519263', 'step': 5806, 'epoch': 1} {'type': 'loss', 'content': 0.14284829795360565, 'timestamp': '2025-09-30 22:19:06.528248', 'step': 5807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:06.565963', 'step': 5807, 'epoch': 1} {'type': 'loss', 'content': 0.19300492107868195, 'timestamp': '2025-09-30 22:19:06.591465', 'step': 5808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:06.623355', 'step': 5808, 'epoch': 1} {'type': 'loss', 'content': 0.14706042408943176, 'timestamp': '2025-09-30 22:19:06.631503', 'step': 5809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.668847', 'step': 5809, 'epoch': 1} {'type': 'loss', 'content': 0.09907828271389008, 'timestamp': '2025-09-30 22:19:06.679637', 'step': 5810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.718229', 'step': 5810, 'epoch': 1} {'type': 'loss', 'content': 0.06117848679423332, 'timestamp': '2025-09-30 22:19:06.721812', 'step': 5811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:06.754344', 'step': 5811, 'epoch': 1} {'type': 'loss', 'content': 0.1164357140660286, 'timestamp': '2025-09-30 22:19:06.778472', 'step': 5812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:06.815410', 'step': 5812, 'epoch': 1} {'type': 'loss', 'content': 0.1710817515850067, 'timestamp': '2025-09-30 22:19:06.829375', 'step': 5813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.860853', 'step': 5813, 'epoch': 1} {'type': 'loss', 'content': 0.1658766120672226, 'timestamp': '2025-09-30 22:19:06.863611', 'step': 5814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:06.908349', 'step': 5814, 'epoch': 1} {'type': 'loss', 'content': 0.16305531561374664, 'timestamp': '2025-09-30 22:19:06.911461', 'step': 5815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:06.942571', 'step': 5815, 'epoch': 1} {'type': 'loss', 'content': 0.14378753304481506, 'timestamp': '2025-09-30 22:19:06.967366', 'step': 5816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:06.997942', 'step': 5816, 'epoch': 1} {'type': 'loss', 'content': 0.13127519190311432, 'timestamp': '2025-09-30 22:19:07.006627', 'step': 5817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:07.038368', 'step': 5817, 'epoch': 1} {'type': 'loss', 'content': 0.15079978108406067, 'timestamp': '2025-09-30 22:19:07.042056', 'step': 5818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:07.080982', 'step': 5818, 'epoch': 1} {'type': 'loss', 'content': 0.07154996693134308, 'timestamp': '2025-09-30 22:19:07.090479', 'step': 5819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:07.122615', 'step': 5819, 'epoch': 1} {'type': 'loss', 'content': 0.17771878838539124, 'timestamp': '2025-09-30 22:19:07.147165', 'step': 5820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.187920', 'step': 5820, 'epoch': 1} {'type': 'loss', 'content': 0.13151966035366058, 'timestamp': '2025-09-30 22:19:07.190747', 'step': 5821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:07.230741', 'step': 5821, 'epoch': 1} {'type': 'loss', 'content': 0.20815607905387878, 'timestamp': '2025-09-30 22:19:07.242125', 'step': 5822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:07.280627', 'step': 5822, 'epoch': 1} {'type': 'loss', 'content': 0.12646307051181793, 'timestamp': '2025-09-30 22:19:07.283474', 'step': 5823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.315999', 'step': 5823, 'epoch': 1} {'type': 'loss', 'content': 0.13314960896968842, 'timestamp': '2025-09-30 22:19:07.345521', 'step': 5824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:07.377272', 'step': 5824, 'epoch': 1} {'type': 'loss', 'content': 0.15905532240867615, 'timestamp': '2025-09-30 22:19:07.382005', 'step': 5825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:07.414216', 'step': 5825, 'epoch': 1} {'type': 'loss', 'content': 0.18318922817707062, 'timestamp': '2025-09-30 22:19:07.417654', 'step': 5826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:07.449054', 'step': 5826, 'epoch': 1} {'type': 'loss', 'content': 0.12648683786392212, 'timestamp': '2025-09-30 22:19:07.458506', 'step': 5827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.497675', 'step': 5827, 'epoch': 1} {'type': 'loss', 'content': 0.09168881922960281, 'timestamp': '2025-09-30 22:19:07.528145', 'step': 5828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:07.566901', 'step': 5828, 'epoch': 1} {'type': 'loss', 'content': 0.15152356028556824, 'timestamp': '2025-09-30 22:19:07.570025', 'step': 5829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.615251', 'step': 5829, 'epoch': 1} {'type': 'loss', 'content': 0.14301875233650208, 'timestamp': '2025-09-30 22:19:07.618872', 'step': 5830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.654958', 'step': 5830, 'epoch': 1} {'type': 'loss', 'content': 0.1879199892282486, 'timestamp': '2025-09-30 22:19:07.663846', 'step': 5831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:07.702632', 'step': 5831, 'epoch': 1} {'type': 'loss', 'content': 0.14036527276039124, 'timestamp': '2025-09-30 22:19:07.733258', 'step': 5832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:19:07.766667', 'step': 5832, 'epoch': 1} {'type': 'loss', 'content': 0.21522335708141327, 'timestamp': '2025-09-30 22:19:07.776749', 'step': 5833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.808914', 'step': 5833, 'epoch': 1} {'type': 'loss', 'content': 0.12886182963848114, 'timestamp': '2025-09-30 22:19:07.812322', 'step': 5834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.843460', 'step': 5834, 'epoch': 1} {'type': 'loss', 'content': 0.08061648905277252, 'timestamp': '2025-09-30 22:19:07.849222', 'step': 5835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:07.881686', 'step': 5835, 'epoch': 1} {'type': 'loss', 'content': 0.19434931874275208, 'timestamp': '2025-09-30 22:19:07.905792', 'step': 5836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.938738', 'step': 5836, 'epoch': 1} {'type': 'loss', 'content': 0.10515956580638885, 'timestamp': '2025-09-30 22:19:07.941896', 'step': 5837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:07.972915', 'step': 5837, 'epoch': 1} {'type': 'loss', 'content': 0.16068515181541443, 'timestamp': '2025-09-30 22:19:07.975392', 'step': 5838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.017925', 'step': 5838, 'epoch': 1} {'type': 'loss', 'content': 0.16348394751548767, 'timestamp': '2025-09-30 22:19:08.022391', 'step': 5839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:08.054338', 'step': 5839, 'epoch': 1} {'type': 'loss', 'content': 0.14578846096992493, 'timestamp': '2025-09-30 22:19:08.078363', 'step': 5840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:08.115210', 'step': 5840, 'epoch': 1} {'type': 'loss', 'content': 0.10002485662698746, 'timestamp': '2025-09-30 22:19:08.119285', 'step': 5841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.152940', 'step': 5841, 'epoch': 1} {'type': 'loss', 'content': 0.15631476044654846, 'timestamp': '2025-09-30 22:19:08.157259', 'step': 5842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:08.188664', 'step': 5842, 'epoch': 1} {'type': 'loss', 'content': 0.07537680119276047, 'timestamp': '2025-09-30 22:19:08.190946', 'step': 5843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.225658', 'step': 5843, 'epoch': 1} {'type': 'loss', 'content': 0.11470837891101837, 'timestamp': '2025-09-30 22:19:08.249502', 'step': 5844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:08.281563', 'step': 5844, 'epoch': 1} {'type': 'loss', 'content': 0.13319678604602814, 'timestamp': '2025-09-30 22:19:08.284286', 'step': 5845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:08.318642', 'step': 5845, 'epoch': 1} {'type': 'loss', 'content': 0.17374363541603088, 'timestamp': '2025-09-30 22:19:08.321492', 'step': 5846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:08.363159', 'step': 5846, 'epoch': 1} {'type': 'loss', 'content': 0.23255828022956848, 'timestamp': '2025-09-30 22:19:08.368357', 'step': 5847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:08.400891', 'step': 5847, 'epoch': 1} {'type': 'loss', 'content': 0.18812096118927002, 'timestamp': '2025-09-30 22:19:08.426365', 'step': 5848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:08.458875', 'step': 5848, 'epoch': 1} {'type': 'loss', 'content': 0.20770703256130219, 'timestamp': '2025-09-30 22:19:08.464147', 'step': 5849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.498067', 'step': 5849, 'epoch': 1} {'type': 'loss', 'content': 0.21779204905033112, 'timestamp': '2025-09-30 22:19:08.501061', 'step': 5850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:08.534430', 'step': 5850, 'epoch': 1} {'type': 'loss', 'content': 0.20327894389629364, 'timestamp': '2025-09-30 22:19:08.539049', 'step': 5851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:08.571525', 'step': 5851, 'epoch': 1} {'type': 'loss', 'content': 0.11275816708803177, 'timestamp': '2025-09-30 22:19:08.595283', 'step': 5852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:08.631426', 'step': 5852, 'epoch': 1} {'type': 'loss', 'content': 0.1629677712917328, 'timestamp': '2025-09-30 22:19:08.634453', 'step': 5853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:08.667550', 'step': 5853, 'epoch': 1} {'type': 'loss', 'content': 0.12895427644252777, 'timestamp': '2025-09-30 22:19:08.671282', 'step': 5854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.702342', 'step': 5854, 'epoch': 1} {'type': 'loss', 'content': 0.21855898201465607, 'timestamp': '2025-09-30 22:19:08.708522', 'step': 5855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:08.740711', 'step': 5855, 'epoch': 1} {'type': 'loss', 'content': 0.14662480354309082, 'timestamp': '2025-09-30 22:19:08.765217', 'step': 5856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:08.796786', 'step': 5856, 'epoch': 1} {'type': 'loss', 'content': 0.2600691616535187, 'timestamp': '2025-09-30 22:19:08.804430', 'step': 5857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.839445', 'step': 5857, 'epoch': 1} {'type': 'loss', 'content': 0.14703775942325592, 'timestamp': '2025-09-30 22:19:08.842014', 'step': 5858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.875557', 'step': 5858, 'epoch': 1} {'type': 'loss', 'content': 0.18111445009708405, 'timestamp': '2025-09-30 22:19:08.878205', 'step': 5859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:08.913298', 'step': 5859, 'epoch': 1} {'type': 'loss', 'content': 0.21765582263469696, 'timestamp': '2025-09-30 22:19:08.940576', 'step': 5860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:08.971170', 'step': 5860, 'epoch': 1} {'type': 'loss', 'content': 0.14466005563735962, 'timestamp': '2025-09-30 22:19:08.973610', 'step': 5861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.011729', 'step': 5861, 'epoch': 1} {'type': 'loss', 'content': 0.1784260869026184, 'timestamp': '2025-09-30 22:19:09.017908', 'step': 5862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:09.054456', 'step': 5862, 'epoch': 1} {'type': 'loss', 'content': 0.14794881641864777, 'timestamp': '2025-09-30 22:19:09.057402', 'step': 5863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.097399', 'step': 5863, 'epoch': 1} {'type': 'loss', 'content': 0.08346589654684067, 'timestamp': '2025-09-30 22:19:09.123489', 'step': 5864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:09.160717', 'step': 5864, 'epoch': 1} {'type': 'loss', 'content': 0.13504016399383545, 'timestamp': '2025-09-30 22:19:09.163673', 'step': 5865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.194724', 'step': 5865, 'epoch': 1} {'type': 'loss', 'content': 0.16463807225227356, 'timestamp': '2025-09-30 22:19:09.201273', 'step': 5866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.232381', 'step': 5866, 'epoch': 1} {'type': 'loss', 'content': 0.1625676453113556, 'timestamp': '2025-09-30 22:19:09.237824', 'step': 5867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.271116', 'step': 5867, 'epoch': 1} {'type': 'loss', 'content': 0.14129093289375305, 'timestamp': '2025-09-30 22:19:09.295824', 'step': 5868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.326959', 'step': 5868, 'epoch': 1} {'type': 'loss', 'content': 0.1601892113685608, 'timestamp': '2025-09-30 22:19:09.336338', 'step': 5869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:09.379693', 'step': 5869, 'epoch': 1} {'type': 'loss', 'content': 0.19536817073822021, 'timestamp': '2025-09-30 22:19:09.382545', 'step': 5870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:09.427057', 'step': 5870, 'epoch': 1} {'type': 'loss', 'content': 0.12520523369312286, 'timestamp': '2025-09-30 22:19:09.430293', 'step': 5871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.464956', 'step': 5871, 'epoch': 1} {'type': 'loss', 'content': 0.09331931918859482, 'timestamp': '2025-09-30 22:19:09.492897', 'step': 5872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:09.527718', 'step': 5872, 'epoch': 1} {'type': 'loss', 'content': 0.14722870290279388, 'timestamp': '2025-09-30 22:19:09.530400', 'step': 5873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.565903', 'step': 5873, 'epoch': 1} {'type': 'loss', 'content': 0.10973131656646729, 'timestamp': '2025-09-30 22:19:09.568542', 'step': 5874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.599708', 'step': 5874, 'epoch': 1} {'type': 'loss', 'content': 0.16300836205482483, 'timestamp': '2025-09-30 22:19:09.601965', 'step': 5875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:09.632355', 'step': 5875, 'epoch': 1} {'type': 'loss', 'content': 0.15968991816043854, 'timestamp': '2025-09-30 22:19:09.656868', 'step': 5876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:09.690264', 'step': 5876, 'epoch': 1} {'type': 'loss', 'content': 0.10824783891439438, 'timestamp': '2025-09-30 22:19:09.693241', 'step': 5877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.724331', 'step': 5877, 'epoch': 1} {'type': 'loss', 'content': 0.08604483306407928, 'timestamp': '2025-09-30 22:19:09.726686', 'step': 5878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.758101', 'step': 5878, 'epoch': 1} {'type': 'loss', 'content': 0.15751959383487701, 'timestamp': '2025-09-30 22:19:09.765347', 'step': 5879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:09.797095', 'step': 5879, 'epoch': 1} {'type': 'loss', 'content': 0.15135635435581207, 'timestamp': '2025-09-30 22:19:09.825403', 'step': 5880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.865416', 'step': 5880, 'epoch': 1} {'type': 'loss', 'content': 0.2552015483379364, 'timestamp': '2025-09-30 22:19:09.868393', 'step': 5881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:09.898857', 'step': 5881, 'epoch': 1} {'type': 'loss', 'content': 0.2020627111196518, 'timestamp': '2025-09-30 22:19:09.905603', 'step': 5882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:09.935663', 'step': 5882, 'epoch': 1} {'type': 'loss', 'content': 0.15255865454673767, 'timestamp': '2025-09-30 22:19:09.938789', 'step': 5883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:09.974091', 'step': 5883, 'epoch': 1} {'type': 'loss', 'content': 0.17248037457466125, 'timestamp': '2025-09-30 22:19:10.002553', 'step': 5884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.037983', 'step': 5884, 'epoch': 1} {'type': 'loss', 'content': 0.058530695736408234, 'timestamp': '2025-09-30 22:19:10.043901', 'step': 5885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:10.078279', 'step': 5885, 'epoch': 1} {'type': 'loss', 'content': 0.20216749608516693, 'timestamp': '2025-09-30 22:19:10.081054', 'step': 5886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:10.114564', 'step': 5886, 'epoch': 1} {'type': 'loss', 'content': 0.1110408753156662, 'timestamp': '2025-09-30 22:19:10.117491', 'step': 5887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:10.148014', 'step': 5887, 'epoch': 1} {'type': 'loss', 'content': 0.16218668222427368, 'timestamp': '2025-09-30 22:19:10.172768', 'step': 5888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.203143', 'step': 5888, 'epoch': 1} {'type': 'loss', 'content': 0.29515281319618225, 'timestamp': '2025-09-30 22:19:10.210010', 'step': 5889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.241583', 'step': 5889, 'epoch': 1} {'type': 'loss', 'content': 0.12412062287330627, 'timestamp': '2025-09-30 22:19:10.245297', 'step': 5890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:10.279996', 'step': 5890, 'epoch': 1} {'type': 'loss', 'content': 0.16339482367038727, 'timestamp': '2025-09-30 22:19:10.286342', 'step': 5891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.321652', 'step': 5891, 'epoch': 1} {'type': 'loss', 'content': 0.10201343894004822, 'timestamp': '2025-09-30 22:19:10.346350', 'step': 5892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.382266', 'step': 5892, 'epoch': 1} {'type': 'loss', 'content': 0.0995521992444992, 'timestamp': '2025-09-30 22:19:10.392921', 'step': 5893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.431529', 'step': 5893, 'epoch': 1} {'type': 'loss', 'content': 0.13706150650978088, 'timestamp': '2025-09-30 22:19:10.438122', 'step': 5894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.469257', 'step': 5894, 'epoch': 1} {'type': 'loss', 'content': 0.22224478423595428, 'timestamp': '2025-09-30 22:19:10.476146', 'step': 5895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:10.511259', 'step': 5895, 'epoch': 1} {'type': 'loss', 'content': 0.16148220002651215, 'timestamp': '2025-09-30 22:19:10.539720', 'step': 5896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.569635', 'step': 5896, 'epoch': 1} {'type': 'loss', 'content': 0.20087063312530518, 'timestamp': '2025-09-30 22:19:10.579169', 'step': 5897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.613986', 'step': 5897, 'epoch': 1} {'type': 'loss', 'content': 0.12501688301563263, 'timestamp': '2025-09-30 22:19:10.620181', 'step': 5898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:10.654177', 'step': 5898, 'epoch': 1} {'type': 'loss', 'content': 0.08359097689390182, 'timestamp': '2025-09-30 22:19:10.659635', 'step': 5899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:10.693620', 'step': 5899, 'epoch': 1} {'type': 'loss', 'content': 0.2145370990037918, 'timestamp': '2025-09-30 22:19:10.720539', 'step': 5900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.753650', 'step': 5900, 'epoch': 1} {'type': 'loss', 'content': 0.11275016516447067, 'timestamp': '2025-09-30 22:19:10.759223', 'step': 5901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.789942', 'step': 5901, 'epoch': 1} {'type': 'loss', 'content': 0.10395815223455429, 'timestamp': '2025-09-30 22:19:10.796611', 'step': 5902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.831181', 'step': 5902, 'epoch': 1} {'type': 'loss', 'content': 0.09917274117469788, 'timestamp': '2025-09-30 22:19:10.839076', 'step': 5903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:10.872724', 'step': 5903, 'epoch': 1} {'type': 'loss', 'content': 0.261454701423645, 'timestamp': '2025-09-30 22:19:10.900298', 'step': 5904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:10.930399', 'step': 5904, 'epoch': 1} {'type': 'loss', 'content': 0.16602738201618195, 'timestamp': '2025-09-30 22:19:10.932918', 'step': 5905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:10.963924', 'step': 5905, 'epoch': 1} {'type': 'loss', 'content': 0.19430294632911682, 'timestamp': '2025-09-30 22:19:10.970298', 'step': 5906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:11.003507', 'step': 5906, 'epoch': 1} {'type': 'loss', 'content': 0.14939923584461212, 'timestamp': '2025-09-30 22:19:11.005932', 'step': 5907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.047331', 'step': 5907, 'epoch': 1} {'type': 'loss', 'content': 0.1718512922525406, 'timestamp': '2025-09-30 22:19:11.071087', 'step': 5908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.102012', 'step': 5908, 'epoch': 1} {'type': 'loss', 'content': 0.14201748371124268, 'timestamp': '2025-09-30 22:19:11.107350', 'step': 5909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.138552', 'step': 5909, 'epoch': 1} {'type': 'loss', 'content': 0.14746876060962677, 'timestamp': '2025-09-30 22:19:11.142256', 'step': 5910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.173189', 'step': 5910, 'epoch': 1} {'type': 'loss', 'content': 0.16313008964061737, 'timestamp': '2025-09-30 22:19:11.178598', 'step': 5911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:11.210419', 'step': 5911, 'epoch': 1} {'type': 'loss', 'content': 0.12283417582511902, 'timestamp': '2025-09-30 22:19:11.235798', 'step': 5912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.267709', 'step': 5912, 'epoch': 1} {'type': 'loss', 'content': 0.12736767530441284, 'timestamp': '2025-09-30 22:19:11.270449', 'step': 5913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.303596', 'step': 5913, 'epoch': 1} {'type': 'loss', 'content': 0.30486375093460083, 'timestamp': '2025-09-30 22:19:11.312024', 'step': 5914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:11.346477', 'step': 5914, 'epoch': 1} {'type': 'loss', 'content': 0.20750312507152557, 'timestamp': '2025-09-30 22:19:11.354085', 'step': 5915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:11.388458', 'step': 5915, 'epoch': 1} {'type': 'loss', 'content': 0.11206851899623871, 'timestamp': '2025-09-30 22:19:11.418921', 'step': 5916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:11.449780', 'step': 5916, 'epoch': 1} {'type': 'loss', 'content': 0.184249609708786, 'timestamp': '2025-09-30 22:19:11.452410', 'step': 5917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.483119', 'step': 5917, 'epoch': 1} {'type': 'loss', 'content': 0.12805546820163727, 'timestamp': '2025-09-30 22:19:11.486332', 'step': 5918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.518281', 'step': 5918, 'epoch': 1} {'type': 'loss', 'content': 0.2006022036075592, 'timestamp': '2025-09-30 22:19:11.521248', 'step': 5919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.564530', 'step': 5919, 'epoch': 1} {'type': 'loss', 'content': 0.17309384047985077, 'timestamp': '2025-09-30 22:19:11.590604', 'step': 5920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.620702', 'step': 5920, 'epoch': 1} {'type': 'loss', 'content': 0.14910005033016205, 'timestamp': '2025-09-30 22:19:11.623685', 'step': 5921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:11.654327', 'step': 5921, 'epoch': 1} {'type': 'loss', 'content': 0.2061365693807602, 'timestamp': '2025-09-30 22:19:11.661823', 'step': 5922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.697331', 'step': 5922, 'epoch': 1} {'type': 'loss', 'content': 0.2301369458436966, 'timestamp': '2025-09-30 22:19:11.699684', 'step': 5923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.730231', 'step': 5923, 'epoch': 1} {'type': 'loss', 'content': 0.13762621581554413, 'timestamp': '2025-09-30 22:19:11.757476', 'step': 5924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.787466', 'step': 5924, 'epoch': 1} {'type': 'loss', 'content': 0.09364277869462967, 'timestamp': '2025-09-30 22:19:11.796280', 'step': 5925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:11.832152', 'step': 5925, 'epoch': 1} {'type': 'loss', 'content': 0.13019591569900513, 'timestamp': '2025-09-30 22:19:11.837855', 'step': 5926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.873332', 'step': 5926, 'epoch': 1} {'type': 'loss', 'content': 0.07665170729160309, 'timestamp': '2025-09-30 22:19:11.877119', 'step': 5927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.911966', 'step': 5927, 'epoch': 1} {'type': 'loss', 'content': 0.07148122787475586, 'timestamp': '2025-09-30 22:19:11.940404', 'step': 5928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:11.975242', 'step': 5928, 'epoch': 1} {'type': 'loss', 'content': 0.09711264818906784, 'timestamp': '2025-09-30 22:19:11.981007', 'step': 5929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.015061', 'step': 5929, 'epoch': 1} {'type': 'loss', 'content': 0.051232606172561646, 'timestamp': '2025-09-30 22:19:12.018061', 'step': 5930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.053025', 'step': 5930, 'epoch': 1} {'type': 'loss', 'content': 0.15125377476215363, 'timestamp': '2025-09-30 22:19:12.058979', 'step': 5931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.089626', 'step': 5931, 'epoch': 1} {'type': 'loss', 'content': 0.16422179341316223, 'timestamp': '2025-09-30 22:19:12.113652', 'step': 5932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:12.144353', 'step': 5932, 'epoch': 1} {'type': 'loss', 'content': 0.24277490377426147, 'timestamp': '2025-09-30 22:19:12.146689', 'step': 5933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.182688', 'step': 5933, 'epoch': 1} {'type': 'loss', 'content': 0.11999543756246567, 'timestamp': '2025-09-30 22:19:12.185521', 'step': 5934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:12.220077', 'step': 5934, 'epoch': 1} {'type': 'loss', 'content': 0.16470372676849365, 'timestamp': '2025-09-30 22:19:12.223046', 'step': 5935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:12.270468', 'step': 5935, 'epoch': 1} {'type': 'loss', 'content': 0.14917606115341187, 'timestamp': '2025-09-30 22:19:12.297802', 'step': 5936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:12.328290', 'step': 5936, 'epoch': 1} {'type': 'loss', 'content': 0.09686567634344101, 'timestamp': '2025-09-30 22:19:12.330569', 'step': 5937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:12.365294', 'step': 5937, 'epoch': 1} {'type': 'loss', 'content': 0.1695772260427475, 'timestamp': '2025-09-30 22:19:12.370543', 'step': 5938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:12.403070', 'step': 5938, 'epoch': 1} {'type': 'loss', 'content': 0.15996891260147095, 'timestamp': '2025-09-30 22:19:12.409954', 'step': 5939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.442936', 'step': 5939, 'epoch': 1} {'type': 'loss', 'content': 0.34997645020484924, 'timestamp': '2025-09-30 22:19:12.469604', 'step': 5940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.499232', 'step': 5940, 'epoch': 1} {'type': 'loss', 'content': 0.17966899275779724, 'timestamp': '2025-09-30 22:19:12.505007', 'step': 5941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:12.535696', 'step': 5941, 'epoch': 1} {'type': 'loss', 'content': 0.1621144860982895, 'timestamp': '2025-09-30 22:19:12.540462', 'step': 5942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:12.572628', 'step': 5942, 'epoch': 1} {'type': 'loss', 'content': 0.08173570036888123, 'timestamp': '2025-09-30 22:19:12.578816', 'step': 5943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:12.611750', 'step': 5943, 'epoch': 1} {'type': 'loss', 'content': 0.15847660601139069, 'timestamp': '2025-09-30 22:19:12.638476', 'step': 5944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:12.668913', 'step': 5944, 'epoch': 1} {'type': 'loss', 'content': 0.2511477470397949, 'timestamp': '2025-09-30 22:19:12.672270', 'step': 5945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:12.706401', 'step': 5945, 'epoch': 1} {'type': 'loss', 'content': 0.11694946140050888, 'timestamp': '2025-09-30 22:19:12.711110', 'step': 5946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:12.744561', 'step': 5946, 'epoch': 1} {'type': 'loss', 'content': 0.13883529603481293, 'timestamp': '2025-09-30 22:19:12.748772', 'step': 5947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.781009', 'step': 5947, 'epoch': 1} {'type': 'loss', 'content': 0.18726733326911926, 'timestamp': '2025-09-30 22:19:12.805224', 'step': 5948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:12.837920', 'step': 5948, 'epoch': 1} {'type': 'loss', 'content': 0.1838354915380478, 'timestamp': '2025-09-30 22:19:12.840184', 'step': 5949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:12.870135', 'step': 5949, 'epoch': 1} {'type': 'loss', 'content': 0.1821635663509369, 'timestamp': '2025-09-30 22:19:12.874016', 'step': 5950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:12.907466', 'step': 5950, 'epoch': 1} {'type': 'loss', 'content': 0.16244009137153625, 'timestamp': '2025-09-30 22:19:12.913320', 'step': 5951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:12.944266', 'step': 5951, 'epoch': 1} {'type': 'loss', 'content': 0.15157996118068695, 'timestamp': '2025-09-30 22:19:12.970704', 'step': 5952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:13.003151', 'step': 5952, 'epoch': 1} {'type': 'loss', 'content': 0.16328690946102142, 'timestamp': '2025-09-30 22:19:13.007044', 'step': 5953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.039501', 'step': 5953, 'epoch': 1} {'type': 'loss', 'content': 0.1040952056646347, 'timestamp': '2025-09-30 22:19:13.041954', 'step': 5954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:13.073606', 'step': 5954, 'epoch': 1} {'type': 'loss', 'content': 0.11182253807783127, 'timestamp': '2025-09-30 22:19:13.077792', 'step': 5955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.108882', 'step': 5955, 'epoch': 1} {'type': 'loss', 'content': 0.11370886862277985, 'timestamp': '2025-09-30 22:19:13.139958', 'step': 5956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.170560', 'step': 5956, 'epoch': 1} {'type': 'loss', 'content': 0.13234315812587738, 'timestamp': '2025-09-30 22:19:13.174820', 'step': 5957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.207100', 'step': 5957, 'epoch': 1} {'type': 'loss', 'content': 0.179655522108078, 'timestamp': '2025-09-30 22:19:13.211822', 'step': 5958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.244804', 'step': 5958, 'epoch': 1} {'type': 'loss', 'content': 0.20787625014781952, 'timestamp': '2025-09-30 22:19:13.249911', 'step': 5959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.280612', 'step': 5959, 'epoch': 1} {'type': 'loss', 'content': 0.08119126409292221, 'timestamp': '2025-09-30 22:19:13.304573', 'step': 5960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.335769', 'step': 5960, 'epoch': 1} {'type': 'loss', 'content': 0.19645148515701294, 'timestamp': '2025-09-30 22:19:13.339732', 'step': 5961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:13.370531', 'step': 5961, 'epoch': 1} {'type': 'loss', 'content': 0.12959574162960052, 'timestamp': '2025-09-30 22:19:13.376141', 'step': 5962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:13.409255', 'step': 5962, 'epoch': 1} {'type': 'loss', 'content': 0.09375916421413422, 'timestamp': '2025-09-30 22:19:13.415859', 'step': 5963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.446374', 'step': 5963, 'epoch': 1} {'type': 'loss', 'content': 0.08471225202083588, 'timestamp': '2025-09-30 22:19:13.473208', 'step': 5964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.503353', 'step': 5964, 'epoch': 1} {'type': 'loss', 'content': 0.11035370081663132, 'timestamp': '2025-09-30 22:19:13.505810', 'step': 5965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.535874', 'step': 5965, 'epoch': 1} {'type': 'loss', 'content': 0.12070455402135849, 'timestamp': '2025-09-30 22:19:13.549583', 'step': 5966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:13.582584', 'step': 5966, 'epoch': 1} {'type': 'loss', 'content': 0.10707264393568039, 'timestamp': '2025-09-30 22:19:13.588397', 'step': 5967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:13.621484', 'step': 5967, 'epoch': 1} {'type': 'loss', 'content': 0.13178935647010803, 'timestamp': '2025-09-30 22:19:13.647768', 'step': 5968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:13.680603', 'step': 5968, 'epoch': 1} {'type': 'loss', 'content': 0.0920381173491478, 'timestamp': '2025-09-30 22:19:13.687329', 'step': 5969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.720588', 'step': 5969, 'epoch': 1} {'type': 'loss', 'content': 0.1481540948152542, 'timestamp': '2025-09-30 22:19:13.725680', 'step': 5970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:13.759101', 'step': 5970, 'epoch': 1} {'type': 'loss', 'content': 0.11388048529624939, 'timestamp': '2025-09-30 22:19:13.762279', 'step': 5971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:19:13.796778', 'step': 5971, 'epoch': 1} {'type': 'loss', 'content': 0.11350773274898529, 'timestamp': '2025-09-30 22:19:13.824854', 'step': 5972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:13.859769', 'step': 5972, 'epoch': 1} {'type': 'loss', 'content': 0.1615421175956726, 'timestamp': '2025-09-30 22:19:13.862796', 'step': 5973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.893762', 'step': 5973, 'epoch': 1} {'type': 'loss', 'content': 0.113840252161026, 'timestamp': '2025-09-30 22:19:13.899562', 'step': 5974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.940708', 'step': 5974, 'epoch': 1} {'type': 'loss', 'content': 0.22867034375667572, 'timestamp': '2025-09-30 22:19:13.953778', 'step': 5975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:13.988157', 'step': 5975, 'epoch': 1} {'type': 'loss', 'content': 0.13863374292850494, 'timestamp': '2025-09-30 22:19:14.013138', 'step': 5976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.043095', 'step': 5976, 'epoch': 1} {'type': 'loss', 'content': 0.1253250390291214, 'timestamp': '2025-09-30 22:19:14.047576', 'step': 5977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:14.080739', 'step': 5977, 'epoch': 1} {'type': 'loss', 'content': 0.12364109605550766, 'timestamp': '2025-09-30 22:19:14.087831', 'step': 5978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:14.120987', 'step': 5978, 'epoch': 1} {'type': 'loss', 'content': 0.2224862426519394, 'timestamp': '2025-09-30 22:19:14.127753', 'step': 5979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:19:14.161619', 'step': 5979, 'epoch': 1} {'type': 'loss', 'content': 0.14150479435920715, 'timestamp': '2025-09-30 22:19:14.187291', 'step': 5980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:14.218578', 'step': 5980, 'epoch': 1} {'type': 'loss', 'content': 0.1715238094329834, 'timestamp': '2025-09-30 22:19:14.224571', 'step': 5981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.259690', 'step': 5981, 'epoch': 1} {'type': 'loss', 'content': 0.16233058273792267, 'timestamp': '2025-09-30 22:19:14.262382', 'step': 5982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:14.292935', 'step': 5982, 'epoch': 1} {'type': 'loss', 'content': 0.1867668628692627, 'timestamp': '2025-09-30 22:19:14.295767', 'step': 5983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:14.330070', 'step': 5983, 'epoch': 1} {'type': 'loss', 'content': 0.27179309725761414, 'timestamp': '2025-09-30 22:19:14.353920', 'step': 5984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.384422', 'step': 5984, 'epoch': 1} {'type': 'loss', 'content': 0.14155271649360657, 'timestamp': '2025-09-30 22:19:14.391782', 'step': 5985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:14.423249', 'step': 5985, 'epoch': 1} {'type': 'loss', 'content': 0.0965450331568718, 'timestamp': '2025-09-30 22:19:14.425765', 'step': 5986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.459884', 'step': 5986, 'epoch': 1} {'type': 'loss', 'content': 0.13135898113250732, 'timestamp': '2025-09-30 22:19:14.462409', 'step': 5987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:14.492343', 'step': 5987, 'epoch': 1} {'type': 'loss', 'content': 0.1618306040763855, 'timestamp': '2025-09-30 22:19:14.525599', 'step': 5988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:14.555622', 'step': 5988, 'epoch': 1} {'type': 'loss', 'content': 0.10230202972888947, 'timestamp': '2025-09-30 22:19:14.563450', 'step': 5989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.593343', 'step': 5989, 'epoch': 1} {'type': 'loss', 'content': 0.19104069471359253, 'timestamp': '2025-09-30 22:19:14.596021', 'step': 5990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.630395', 'step': 5990, 'epoch': 1} {'type': 'loss', 'content': 0.11484192311763763, 'timestamp': '2025-09-30 22:19:14.634119', 'step': 5991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.665293', 'step': 5991, 'epoch': 1} {'type': 'loss', 'content': 0.08452702313661575, 'timestamp': '2025-09-30 22:19:14.691306', 'step': 5992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:14.722273', 'step': 5992, 'epoch': 1} {'type': 'loss', 'content': 0.19923602044582367, 'timestamp': '2025-09-30 22:19:14.729633', 'step': 5993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.761705', 'step': 5993, 'epoch': 1} {'type': 'loss', 'content': 0.1926940381526947, 'timestamp': '2025-09-30 22:19:14.764866', 'step': 5994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:14.795543', 'step': 5994, 'epoch': 1} {'type': 'loss', 'content': 0.24487720429897308, 'timestamp': '2025-09-30 22:19:14.797953', 'step': 5995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:14.836942', 'step': 5995, 'epoch': 1} {'type': 'loss', 'content': 0.14925262331962585, 'timestamp': '2025-09-30 22:19:14.869775', 'step': 5996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:14.900447', 'step': 5996, 'epoch': 1} {'type': 'loss', 'content': 0.1435888558626175, 'timestamp': '2025-09-30 22:19:14.907594', 'step': 5997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:14.938605', 'step': 5997, 'epoch': 1} {'type': 'loss', 'content': 0.14869356155395508, 'timestamp': '2025-09-30 22:19:14.947159', 'step': 5998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:14.983759', 'step': 5998, 'epoch': 1} {'type': 'loss', 'content': 0.22369232773780823, 'timestamp': '2025-09-30 22:19:14.985958', 'step': 5999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:15.022700', 'step': 5999, 'epoch': 1} {'type': 'loss', 'content': 0.23823300004005432, 'timestamp': '2025-09-30 22:19:15.052319', 'step': 6000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6000', 'timestamp': '2025-09-30 22:19:20.409470', 'step': 6000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:20.442487', 'step': 6000, 'epoch': 1} {'type': 'loss', 'content': 0.15022693574428558, 'timestamp': '2025-09-30 22:19:20.450769', 'step': 6001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:20.487068', 'step': 6001, 'epoch': 1} {'type': 'loss', 'content': 0.21808138489723206, 'timestamp': '2025-09-30 22:19:20.490313', 'step': 6002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:20.522489', 'step': 6002, 'epoch': 1} {'type': 'loss', 'content': 0.17299814522266388, 'timestamp': '2025-09-30 22:19:20.526509', 'step': 6003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:20.563453', 'step': 6003, 'epoch': 1} {'type': 'loss', 'content': 0.118643619120121, 'timestamp': '2025-09-30 22:19:20.588888', 'step': 6004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:20.618941', 'step': 6004, 'epoch': 1} {'type': 'loss', 'content': 0.14407844841480255, 'timestamp': '2025-09-30 22:19:20.630659', 'step': 6005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:20.675881', 'step': 6005, 'epoch': 1} {'type': 'loss', 'content': 0.0815914049744606, 'timestamp': '2025-09-30 22:19:20.679232', 'step': 6006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:20.711686', 'step': 6006, 'epoch': 1} {'type': 'loss', 'content': 0.2252771407365799, 'timestamp': '2025-09-30 22:19:20.715668', 'step': 6007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:20.747658', 'step': 6007, 'epoch': 1} {'type': 'loss', 'content': 0.09116274118423462, 'timestamp': '2025-09-30 22:19:20.778600', 'step': 6008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:20.813710', 'step': 6008, 'epoch': 1} {'type': 'loss', 'content': 0.11280400305986404, 'timestamp': '2025-09-30 22:19:20.817501', 'step': 6009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:20.848678', 'step': 6009, 'epoch': 1} {'type': 'loss', 'content': 0.13626615703105927, 'timestamp': '2025-09-30 22:19:20.852224', 'step': 6010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:20.883838', 'step': 6010, 'epoch': 1} {'type': 'loss', 'content': 0.20560358464717865, 'timestamp': '2025-09-30 22:19:20.896599', 'step': 6011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:20.934338', 'step': 6011, 'epoch': 1} {'type': 'loss', 'content': 0.11651245504617691, 'timestamp': '2025-09-30 22:19:20.962283', 'step': 6012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:21.000105', 'step': 6012, 'epoch': 1} {'type': 'loss', 'content': 0.1943349838256836, 'timestamp': '2025-09-30 22:19:21.003051', 'step': 6013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.040900', 'step': 6013, 'epoch': 1} {'type': 'loss', 'content': 0.1767643243074417, 'timestamp': '2025-09-30 22:19:21.044916', 'step': 6014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:19:21.079825', 'step': 6014, 'epoch': 1} {'type': 'loss', 'content': 0.10792044550180435, 'timestamp': '2025-09-30 22:19:21.084224', 'step': 6015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.116830', 'step': 6015, 'epoch': 1} {'type': 'loss', 'content': 0.1815039962530136, 'timestamp': '2025-09-30 22:19:21.141174', 'step': 6016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.172275', 'step': 6016, 'epoch': 1} {'type': 'loss', 'content': 0.14173148572444916, 'timestamp': '2025-09-30 22:19:21.175026', 'step': 6017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.214531', 'step': 6017, 'epoch': 1} {'type': 'loss', 'content': 0.09613806754350662, 'timestamp': '2025-09-30 22:19:21.223164', 'step': 6018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.253635', 'step': 6018, 'epoch': 1} {'type': 'loss', 'content': 0.1315346211194992, 'timestamp': '2025-09-30 22:19:21.260907', 'step': 6019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.292668', 'step': 6019, 'epoch': 1} {'type': 'loss', 'content': 0.24159061908721924, 'timestamp': '2025-09-30 22:19:21.317602', 'step': 6020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.349186', 'step': 6020, 'epoch': 1} {'type': 'loss', 'content': 0.19846118986606598, 'timestamp': '2025-09-30 22:19:21.358309', 'step': 6021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.391985', 'step': 6021, 'epoch': 1} {'type': 'loss', 'content': 0.08900639414787292, 'timestamp': '2025-09-30 22:19:21.402056', 'step': 6022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.441450', 'step': 6022, 'epoch': 1} {'type': 'loss', 'content': 0.12302186340093613, 'timestamp': '2025-09-30 22:19:21.453342', 'step': 6023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.488418', 'step': 6023, 'epoch': 1} {'type': 'loss', 'content': 0.13562506437301636, 'timestamp': '2025-09-30 22:19:21.512743', 'step': 6024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.550835', 'step': 6024, 'epoch': 1} {'type': 'loss', 'content': 0.11756569147109985, 'timestamp': '2025-09-30 22:19:21.553919', 'step': 6025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.590930', 'step': 6025, 'epoch': 1} {'type': 'loss', 'content': 0.12612241506576538, 'timestamp': '2025-09-30 22:19:21.593389', 'step': 6026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.630527', 'step': 6026, 'epoch': 1} {'type': 'loss', 'content': 0.0889684185385704, 'timestamp': '2025-09-30 22:19:21.643449', 'step': 6027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.685779', 'step': 6027, 'epoch': 1} {'type': 'loss', 'content': 0.15275512635707855, 'timestamp': '2025-09-30 22:19:21.710717', 'step': 6028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.743915', 'step': 6028, 'epoch': 1} {'type': 'loss', 'content': 0.16741515696048737, 'timestamp': '2025-09-30 22:19:21.755143', 'step': 6029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:21.786766', 'step': 6029, 'epoch': 1} {'type': 'loss', 'content': 0.15895910561084747, 'timestamp': '2025-09-30 22:19:21.790789', 'step': 6030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.826891', 'step': 6030, 'epoch': 1} {'type': 'loss', 'content': 0.08385660499334335, 'timestamp': '2025-09-30 22:19:21.830287', 'step': 6031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:21.872337', 'step': 6031, 'epoch': 1} {'type': 'loss', 'content': 0.15246513485908508, 'timestamp': '2025-09-30 22:19:21.905169', 'step': 6032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.945799', 'step': 6032, 'epoch': 1} {'type': 'loss', 'content': 0.16043449938297272, 'timestamp': '2025-09-30 22:19:21.958424', 'step': 6033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:21.999271', 'step': 6033, 'epoch': 1} {'type': 'loss', 'content': 0.16892392933368683, 'timestamp': '2025-09-30 22:19:22.002491', 'step': 6034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:22.035528', 'step': 6034, 'epoch': 1} {'type': 'loss', 'content': 0.22650057077407837, 'timestamp': '2025-09-30 22:19:22.038667', 'step': 6035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.081849', 'step': 6035, 'epoch': 1} {'type': 'loss', 'content': 0.25053560733795166, 'timestamp': '2025-09-30 22:19:22.106605', 'step': 6036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:22.146836', 'step': 6036, 'epoch': 1} {'type': 'loss', 'content': 0.161648690700531, 'timestamp': '2025-09-30 22:19:22.149903', 'step': 6037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:22.182827', 'step': 6037, 'epoch': 1} {'type': 'loss', 'content': 0.1414429396390915, 'timestamp': '2025-09-30 22:19:22.185685', 'step': 6038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.216522', 'step': 6038, 'epoch': 1} {'type': 'loss', 'content': 0.1641954481601715, 'timestamp': '2025-09-30 22:19:22.227638', 'step': 6039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.267413', 'step': 6039, 'epoch': 1} {'type': 'loss', 'content': 0.09746526926755905, 'timestamp': '2025-09-30 22:19:22.292621', 'step': 6040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:22.322837', 'step': 6040, 'epoch': 1} {'type': 'loss', 'content': 0.16257213056087494, 'timestamp': '2025-09-30 22:19:22.326025', 'step': 6041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.365783', 'step': 6041, 'epoch': 1} {'type': 'loss', 'content': 0.17111922800540924, 'timestamp': '2025-09-30 22:19:22.378095', 'step': 6042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:22.409531', 'step': 6042, 'epoch': 1} {'type': 'loss', 'content': 0.19786371290683746, 'timestamp': '2025-09-30 22:19:22.413383', 'step': 6043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.455791', 'step': 6043, 'epoch': 1} {'type': 'loss', 'content': 0.09778612852096558, 'timestamp': '2025-09-30 22:19:22.481455', 'step': 6044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:22.525448', 'step': 6044, 'epoch': 1} {'type': 'loss', 'content': 0.13189274072647095, 'timestamp': '2025-09-30 22:19:22.529782', 'step': 6045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:22.561472', 'step': 6045, 'epoch': 1} {'type': 'loss', 'content': 0.13698434829711914, 'timestamp': '2025-09-30 22:19:22.565225', 'step': 6046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.596044', 'step': 6046, 'epoch': 1} {'type': 'loss', 'content': 0.087801992893219, 'timestamp': '2025-09-30 22:19:22.609686', 'step': 6047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.640650', 'step': 6047, 'epoch': 1} {'type': 'loss', 'content': 0.12173552066087723, 'timestamp': '2025-09-30 22:19:22.665927', 'step': 6048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.697232', 'step': 6048, 'epoch': 1} {'type': 'loss', 'content': 0.07700596004724503, 'timestamp': '2025-09-30 22:19:22.700885', 'step': 6049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:22.733183', 'step': 6049, 'epoch': 1} {'type': 'loss', 'content': 0.158580020070076, 'timestamp': '2025-09-30 22:19:22.737117', 'step': 6050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:22.779944', 'step': 6050, 'epoch': 1} {'type': 'loss', 'content': 0.23429448902606964, 'timestamp': '2025-09-30 22:19:22.783678', 'step': 6051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:22.824258', 'step': 6051, 'epoch': 1} {'type': 'loss', 'content': 0.1257070153951645, 'timestamp': '2025-09-30 22:19:22.857987', 'step': 6052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:22.899690', 'step': 6052, 'epoch': 1} {'type': 'loss', 'content': 0.0959065705537796, 'timestamp': '2025-09-30 22:19:22.904168', 'step': 6053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:22.936243', 'step': 6053, 'epoch': 1} {'type': 'loss', 'content': 0.09525305032730103, 'timestamp': '2025-09-30 22:19:22.939411', 'step': 6054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:22.978336', 'step': 6054, 'epoch': 1} {'type': 'loss', 'content': 0.1635916382074356, 'timestamp': '2025-09-30 22:19:22.981479', 'step': 6055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:23.020851', 'step': 6055, 'epoch': 1} {'type': 'loss', 'content': 0.18553173542022705, 'timestamp': '2025-09-30 22:19:23.055122', 'step': 6056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:23.096284', 'step': 6056, 'epoch': 1} {'type': 'loss', 'content': 0.09496388584375381, 'timestamp': '2025-09-30 22:19:23.100746', 'step': 6057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:23.143259', 'step': 6057, 'epoch': 1} {'type': 'loss', 'content': 0.16265751421451569, 'timestamp': '2025-09-30 22:19:23.147419', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:19:31.518115', 'step': 6058, 'epoch': 1} {'type': 'pplx', 'content': 8271.472798569665, 'timestamp': '2025-09-30 22:19:31.525054', 'step': 6058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:31.559118', 'step': 6058, 'epoch': 1} {'type': 'loss', 'content': 0.3210359811782837, 'timestamp': '2025-09-30 22:19:31.565091', 'step': 6059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:31.598710', 'step': 6059, 'epoch': 1} {'type': 'loss', 'content': 0.1492139846086502, 'timestamp': '2025-09-30 22:19:31.624357', 'step': 6060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:31.671623', 'step': 6060, 'epoch': 1} {'type': 'loss', 'content': 0.15493465960025787, 'timestamp': '2025-09-30 22:19:31.675078', 'step': 6061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:31.732280', 'step': 6061, 'epoch': 1} {'type': 'loss', 'content': 0.19255661964416504, 'timestamp': '2025-09-30 22:19:31.736509', 'step': 6062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:31.775775', 'step': 6062, 'epoch': 1} {'type': 'loss', 'content': 0.06334953755140305, 'timestamp': '2025-09-30 22:19:31.786183', 'step': 6063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:31.817716', 'step': 6063, 'epoch': 1} {'type': 'loss', 'content': 0.09436717629432678, 'timestamp': '2025-09-30 22:19:31.845540', 'step': 6064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:31.880046', 'step': 6064, 'epoch': 1} {'type': 'loss', 'content': 0.20425283908843994, 'timestamp': '2025-09-30 22:19:31.882566', 'step': 6065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:31.913730', 'step': 6065, 'epoch': 1} {'type': 'loss', 'content': 0.08719416707754135, 'timestamp': '2025-09-30 22:19:31.919712', 'step': 6066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:31.954416', 'step': 6066, 'epoch': 1} {'type': 'loss', 'content': 0.1772630214691162, 'timestamp': '2025-09-30 22:19:31.956974', 'step': 6067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:31.993347', 'step': 6067, 'epoch': 1} {'type': 'loss', 'content': 0.11506043374538422, 'timestamp': '2025-09-30 22:19:32.024023', 'step': 6068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:32.054648', 'step': 6068, 'epoch': 1} {'type': 'loss', 'content': 0.19888174533843994, 'timestamp': '2025-09-30 22:19:32.057250', 'step': 6069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.092277', 'step': 6069, 'epoch': 1} {'type': 'loss', 'content': 0.11889045685529709, 'timestamp': '2025-09-30 22:19:32.098458', 'step': 6070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.135158', 'step': 6070, 'epoch': 1} {'type': 'loss', 'content': 0.23195283114910126, 'timestamp': '2025-09-30 22:19:32.143374', 'step': 6071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.175655', 'step': 6071, 'epoch': 1} {'type': 'loss', 'content': 0.11689376085996628, 'timestamp': '2025-09-30 22:19:32.200457', 'step': 6072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:32.235758', 'step': 6072, 'epoch': 1} {'type': 'loss', 'content': 0.12236299365758896, 'timestamp': '2025-09-30 22:19:32.243135', 'step': 6073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.282112', 'step': 6073, 'epoch': 1} {'type': 'loss', 'content': 0.13168928027153015, 'timestamp': '2025-09-30 22:19:32.284959', 'step': 6074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.320783', 'step': 6074, 'epoch': 1} {'type': 'loss', 'content': 0.14974431693553925, 'timestamp': '2025-09-30 22:19:32.323632', 'step': 6075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:32.360768', 'step': 6075, 'epoch': 1} {'type': 'loss', 'content': 0.20862777531147003, 'timestamp': '2025-09-30 22:19:32.385263', 'step': 6076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:32.416367', 'step': 6076, 'epoch': 1} {'type': 'loss', 'content': 0.07723034173250198, 'timestamp': '2025-09-30 22:19:32.423058', 'step': 6077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.459700', 'step': 6077, 'epoch': 1} {'type': 'loss', 'content': 0.10534071177244186, 'timestamp': '2025-09-30 22:19:32.462219', 'step': 6078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.493764', 'step': 6078, 'epoch': 1} {'type': 'loss', 'content': 0.13670876622200012, 'timestamp': '2025-09-30 22:19:32.500118', 'step': 6079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.535632', 'step': 6079, 'epoch': 1} {'type': 'loss', 'content': 0.1851091980934143, 'timestamp': '2025-09-30 22:19:32.564801', 'step': 6080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:32.598295', 'step': 6080, 'epoch': 1} {'type': 'loss', 'content': 0.20544613897800446, 'timestamp': '2025-09-30 22:19:32.601688', 'step': 6081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.633694', 'step': 6081, 'epoch': 1} {'type': 'loss', 'content': 0.09461990743875504, 'timestamp': '2025-09-30 22:19:32.641102', 'step': 6082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.672272', 'step': 6082, 'epoch': 1} {'type': 'loss', 'content': 0.15289051830768585, 'timestamp': '2025-09-30 22:19:32.675626', 'step': 6083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.706482', 'step': 6083, 'epoch': 1} {'type': 'loss', 'content': 0.12401057779788971, 'timestamp': '2025-09-30 22:19:32.731474', 'step': 6084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.773937', 'step': 6084, 'epoch': 1} {'type': 'loss', 'content': 0.1979825645685196, 'timestamp': '2025-09-30 22:19:32.780029', 'step': 6085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.816175', 'step': 6085, 'epoch': 1} {'type': 'loss', 'content': 0.19340850412845612, 'timestamp': '2025-09-30 22:19:32.818975', 'step': 6086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:32.855059', 'step': 6086, 'epoch': 1} {'type': 'loss', 'content': 0.29786086082458496, 'timestamp': '2025-09-30 22:19:32.858495', 'step': 6087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:32.894463', 'step': 6087, 'epoch': 1} {'type': 'loss', 'content': 0.21081067621707916, 'timestamp': '2025-09-30 22:19:32.919502', 'step': 6088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.959768', 'step': 6088, 'epoch': 1} {'type': 'loss', 'content': 0.0744837298989296, 'timestamp': '2025-09-30 22:19:32.962615', 'step': 6089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:32.993702', 'step': 6089, 'epoch': 1} {'type': 'loss', 'content': 0.13889510929584503, 'timestamp': '2025-09-30 22:19:32.996703', 'step': 6090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.034124', 'step': 6090, 'epoch': 1} {'type': 'loss', 'content': 0.12822985649108887, 'timestamp': '2025-09-30 22:19:33.040865', 'step': 6091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:33.076609', 'step': 6091, 'epoch': 1} {'type': 'loss', 'content': 0.20748820900917053, 'timestamp': '2025-09-30 22:19:33.105211', 'step': 6092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.143746', 'step': 6092, 'epoch': 1} {'type': 'loss', 'content': 0.11282113939523697, 'timestamp': '2025-09-30 22:19:33.146349', 'step': 6093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.185636', 'step': 6093, 'epoch': 1} {'type': 'loss', 'content': 0.11471375077962875, 'timestamp': '2025-09-30 22:19:33.188869', 'step': 6094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.225935', 'step': 6094, 'epoch': 1} {'type': 'loss', 'content': 0.23924940824508667, 'timestamp': '2025-09-30 22:19:33.234773', 'step': 6095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.266993', 'step': 6095, 'epoch': 1} {'type': 'loss', 'content': 0.22508780658245087, 'timestamp': '2025-09-30 22:19:33.292196', 'step': 6096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.323819', 'step': 6096, 'epoch': 1} {'type': 'loss', 'content': 0.13300134241580963, 'timestamp': '2025-09-30 22:19:33.331470', 'step': 6097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.362872', 'step': 6097, 'epoch': 1} {'type': 'loss', 'content': 0.12399083375930786, 'timestamp': '2025-09-30 22:19:33.369806', 'step': 6098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.401627', 'step': 6098, 'epoch': 1} {'type': 'loss', 'content': 0.14026664197444916, 'timestamp': '2025-09-30 22:19:33.404179', 'step': 6099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:33.439441', 'step': 6099, 'epoch': 1} {'type': 'loss', 'content': 0.1160397082567215, 'timestamp': '2025-09-30 22:19:33.468445', 'step': 6100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.498541', 'step': 6100, 'epoch': 1} {'type': 'loss', 'content': 0.10566042363643646, 'timestamp': '2025-09-30 22:19:33.502559', 'step': 6101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.536621', 'step': 6101, 'epoch': 1} {'type': 'loss', 'content': 0.11840779334306717, 'timestamp': '2025-09-30 22:19:33.543179', 'step': 6102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.577299', 'step': 6102, 'epoch': 1} {'type': 'loss', 'content': 0.14610488712787628, 'timestamp': '2025-09-30 22:19:33.583243', 'step': 6103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:33.617946', 'step': 6103, 'epoch': 1} {'type': 'loss', 'content': 0.18091094493865967, 'timestamp': '2025-09-30 22:19:33.642840', 'step': 6104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:33.682604', 'step': 6104, 'epoch': 1} {'type': 'loss', 'content': 0.15124009549617767, 'timestamp': '2025-09-30 22:19:33.695609', 'step': 6105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:33.728744', 'step': 6105, 'epoch': 1} {'type': 'loss', 'content': 0.1367507129907608, 'timestamp': '2025-09-30 22:19:33.735591', 'step': 6106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:33.766506', 'step': 6106, 'epoch': 1} {'type': 'loss', 'content': 0.20876947045326233, 'timestamp': '2025-09-30 22:19:33.769133', 'step': 6107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.806900', 'step': 6107, 'epoch': 1} {'type': 'loss', 'content': 0.18199779093265533, 'timestamp': '2025-09-30 22:19:33.832705', 'step': 6108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:33.864953', 'step': 6108, 'epoch': 1} {'type': 'loss', 'content': 0.18704618513584137, 'timestamp': '2025-09-30 22:19:33.871956', 'step': 6109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.911164', 'step': 6109, 'epoch': 1} {'type': 'loss', 'content': 0.13273541629314423, 'timestamp': '2025-09-30 22:19:33.919774', 'step': 6110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:33.953268', 'step': 6110, 'epoch': 1} {'type': 'loss', 'content': 0.16789083182811737, 'timestamp': '2025-09-30 22:19:33.959062', 'step': 6111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:33.991889', 'step': 6111, 'epoch': 1} {'type': 'loss', 'content': 0.15599872171878815, 'timestamp': '2025-09-30 22:19:34.019928', 'step': 6112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.054029', 'step': 6112, 'epoch': 1} {'type': 'loss', 'content': 0.14690423011779785, 'timestamp': '2025-09-30 22:19:34.060296', 'step': 6113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.092827', 'step': 6113, 'epoch': 1} {'type': 'loss', 'content': 0.13456442952156067, 'timestamp': '2025-09-30 22:19:34.102636', 'step': 6114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:34.138863', 'step': 6114, 'epoch': 1} {'type': 'loss', 'content': 0.10790404677391052, 'timestamp': '2025-09-30 22:19:34.142917', 'step': 6115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.176625', 'step': 6115, 'epoch': 1} {'type': 'loss', 'content': 0.17005950212478638, 'timestamp': '2025-09-30 22:19:34.204250', 'step': 6116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:34.234527', 'step': 6116, 'epoch': 1} {'type': 'loss', 'content': 0.16188432276248932, 'timestamp': '2025-09-30 22:19:34.241581', 'step': 6117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:34.274162', 'step': 6117, 'epoch': 1} {'type': 'loss', 'content': 0.12156116217374802, 'timestamp': '2025-09-30 22:19:34.279855', 'step': 6118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:34.312998', 'step': 6118, 'epoch': 1} {'type': 'loss', 'content': 0.18078891932964325, 'timestamp': '2025-09-30 22:19:34.318669', 'step': 6119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:34.351977', 'step': 6119, 'epoch': 1} {'type': 'loss', 'content': 0.20164798200130463, 'timestamp': '2025-09-30 22:19:34.376601', 'step': 6120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:34.410639', 'step': 6120, 'epoch': 1} {'type': 'loss', 'content': 0.09564746916294098, 'timestamp': '2025-09-30 22:19:34.418267', 'step': 6121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:34.450860', 'step': 6121, 'epoch': 1} {'type': 'loss', 'content': 0.19159330427646637, 'timestamp': '2025-09-30 22:19:34.456704', 'step': 6122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.491602', 'step': 6122, 'epoch': 1} {'type': 'loss', 'content': 0.20211757719516754, 'timestamp': '2025-09-30 22:19:34.494009', 'step': 6123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.526855', 'step': 6123, 'epoch': 1} {'type': 'loss', 'content': 0.09554021060466766, 'timestamp': '2025-09-30 22:19:34.553743', 'step': 6124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:34.586580', 'step': 6124, 'epoch': 1} {'type': 'loss', 'content': 0.23661968111991882, 'timestamp': '2025-09-30 22:19:34.590508', 'step': 6125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.625476', 'step': 6125, 'epoch': 1} {'type': 'loss', 'content': 0.18682454526424408, 'timestamp': '2025-09-30 22:19:34.646888', 'step': 6126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:34.682272', 'step': 6126, 'epoch': 1} {'type': 'loss', 'content': 0.08929980546236038, 'timestamp': '2025-09-30 22:19:34.687493', 'step': 6127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:34.720620', 'step': 6127, 'epoch': 1} {'type': 'loss', 'content': 0.19877482950687408, 'timestamp': '2025-09-30 22:19:34.749170', 'step': 6128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.783102', 'step': 6128, 'epoch': 1} {'type': 'loss', 'content': 0.1052585244178772, 'timestamp': '2025-09-30 22:19:34.788412', 'step': 6129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.820780', 'step': 6129, 'epoch': 1} {'type': 'loss', 'content': 0.16566847264766693, 'timestamp': '2025-09-30 22:19:34.823917', 'step': 6130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:34.860024', 'step': 6130, 'epoch': 1} {'type': 'loss', 'content': 0.10457183420658112, 'timestamp': '2025-09-30 22:19:34.866047', 'step': 6131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:34.900532', 'step': 6131, 'epoch': 1} {'type': 'loss', 'content': 0.29458972811698914, 'timestamp': '2025-09-30 22:19:34.930189', 'step': 6132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:34.960585', 'step': 6132, 'epoch': 1} {'type': 'loss', 'content': 0.13014096021652222, 'timestamp': '2025-09-30 22:19:34.965741', 'step': 6133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:34.999166', 'step': 6133, 'epoch': 1} {'type': 'loss', 'content': 0.11362092196941376, 'timestamp': '2025-09-30 22:19:35.013880', 'step': 6134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.044100', 'step': 6134, 'epoch': 1} {'type': 'loss', 'content': 0.18361927568912506, 'timestamp': '2025-09-30 22:19:35.049523', 'step': 6135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:35.081054', 'step': 6135, 'epoch': 1} {'type': 'loss', 'content': 0.1189500018954277, 'timestamp': '2025-09-30 22:19:35.106566', 'step': 6136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:35.140726', 'step': 6136, 'epoch': 1} {'type': 'loss', 'content': 0.148863285779953, 'timestamp': '2025-09-30 22:19:35.147498', 'step': 6137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.180803', 'step': 6137, 'epoch': 1} {'type': 'loss', 'content': 0.22190850973129272, 'timestamp': '2025-09-30 22:19:35.186929', 'step': 6138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.220798', 'step': 6138, 'epoch': 1} {'type': 'loss', 'content': 0.23529303073883057, 'timestamp': '2025-09-30 22:19:35.223667', 'step': 6139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.260257', 'step': 6139, 'epoch': 1} {'type': 'loss', 'content': 0.0904989019036293, 'timestamp': '2025-09-30 22:19:35.287274', 'step': 6140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.321878', 'step': 6140, 'epoch': 1} {'type': 'loss', 'content': 0.15773729979991913, 'timestamp': '2025-09-30 22:19:35.328171', 'step': 6141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.359700', 'step': 6141, 'epoch': 1} {'type': 'loss', 'content': 0.2537737190723419, 'timestamp': '2025-09-30 22:19:35.365248', 'step': 6142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.398958', 'step': 6142, 'epoch': 1} {'type': 'loss', 'content': 0.17757034301757812, 'timestamp': '2025-09-30 22:19:35.401488', 'step': 6143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.436993', 'step': 6143, 'epoch': 1} {'type': 'loss', 'content': 0.09296100586652756, 'timestamp': '2025-09-30 22:19:35.461131', 'step': 6144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.494795', 'step': 6144, 'epoch': 1} {'type': 'loss', 'content': 0.22766482830047607, 'timestamp': '2025-09-30 22:19:35.497462', 'step': 6145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.527961', 'step': 6145, 'epoch': 1} {'type': 'loss', 'content': 0.12989641726016998, 'timestamp': '2025-09-30 22:19:35.530556', 'step': 6146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.564964', 'step': 6146, 'epoch': 1} {'type': 'loss', 'content': 0.172190859913826, 'timestamp': '2025-09-30 22:19:35.567655', 'step': 6147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.598458', 'step': 6147, 'epoch': 1} {'type': 'loss', 'content': 0.16600657999515533, 'timestamp': '2025-09-30 22:19:35.625854', 'step': 6148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.665196', 'step': 6148, 'epoch': 1} {'type': 'loss', 'content': 0.13098788261413574, 'timestamp': '2025-09-30 22:19:35.671231', 'step': 6149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:35.704895', 'step': 6149, 'epoch': 1} {'type': 'loss', 'content': 0.15555113554000854, 'timestamp': '2025-09-30 22:19:35.708468', 'step': 6150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.741262', 'step': 6150, 'epoch': 1} {'type': 'loss', 'content': 0.20809750258922577, 'timestamp': '2025-09-30 22:19:35.748524', 'step': 6151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:35.786683', 'step': 6151, 'epoch': 1} {'type': 'loss', 'content': 0.16531415283679962, 'timestamp': '2025-09-30 22:19:35.811492', 'step': 6152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.853154', 'step': 6152, 'epoch': 1} {'type': 'loss', 'content': 0.25089699029922485, 'timestamp': '2025-09-30 22:19:35.856138', 'step': 6153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:35.887064', 'step': 6153, 'epoch': 1} {'type': 'loss', 'content': 0.14674092829227448, 'timestamp': '2025-09-30 22:19:35.894947', 'step': 6154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:35.928013', 'step': 6154, 'epoch': 1} {'type': 'loss', 'content': 0.18343281745910645, 'timestamp': '2025-09-30 22:19:35.936081', 'step': 6155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:35.968112', 'step': 6155, 'epoch': 1} {'type': 'loss', 'content': 0.21616654098033905, 'timestamp': '2025-09-30 22:19:35.996915', 'step': 6156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:36.030262', 'step': 6156, 'epoch': 1} {'type': 'loss', 'content': 0.151100754737854, 'timestamp': '2025-09-30 22:19:36.047654', 'step': 6157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:36.078522', 'step': 6157, 'epoch': 1} {'type': 'loss', 'content': 0.3002910614013672, 'timestamp': '2025-09-30 22:19:36.087135', 'step': 6158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.124842', 'step': 6158, 'epoch': 1} {'type': 'loss', 'content': 0.18394474685192108, 'timestamp': '2025-09-30 22:19:36.130947', 'step': 6159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.162265', 'step': 6159, 'epoch': 1} {'type': 'loss', 'content': 0.10547322034835815, 'timestamp': '2025-09-30 22:19:36.189898', 'step': 6160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.226159', 'step': 6160, 'epoch': 1} {'type': 'loss', 'content': 0.09260398149490356, 'timestamp': '2025-09-30 22:19:36.231955', 'step': 6161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.264884', 'step': 6161, 'epoch': 1} {'type': 'loss', 'content': 0.1474032998085022, 'timestamp': '2025-09-30 22:19:36.271529', 'step': 6162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.304975', 'step': 6162, 'epoch': 1} {'type': 'loss', 'content': 0.15506862103939056, 'timestamp': '2025-09-30 22:19:36.310755', 'step': 6163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.341269', 'step': 6163, 'epoch': 1} {'type': 'loss', 'content': 0.16059570014476776, 'timestamp': '2025-09-30 22:19:36.370903', 'step': 6164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.410542', 'step': 6164, 'epoch': 1} {'type': 'loss', 'content': 0.1768283247947693, 'timestamp': '2025-09-30 22:19:36.413024', 'step': 6165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.445584', 'step': 6165, 'epoch': 1} {'type': 'loss', 'content': 0.08305448293685913, 'timestamp': '2025-09-30 22:19:36.448254', 'step': 6166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.481653', 'step': 6166, 'epoch': 1} {'type': 'loss', 'content': 0.1633967012166977, 'timestamp': '2025-09-30 22:19:36.486998', 'step': 6167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:36.520874', 'step': 6167, 'epoch': 1} {'type': 'loss', 'content': 0.12633542716503143, 'timestamp': '2025-09-30 22:19:36.549191', 'step': 6168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.586472', 'step': 6168, 'epoch': 1} {'type': 'loss', 'content': 0.1742490530014038, 'timestamp': '2025-09-30 22:19:36.592592', 'step': 6169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.624595', 'step': 6169, 'epoch': 1} {'type': 'loss', 'content': 0.11362557113170624, 'timestamp': '2025-09-30 22:19:36.628651', 'step': 6170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:36.663956', 'step': 6170, 'epoch': 1} {'type': 'loss', 'content': 0.15890935063362122, 'timestamp': '2025-09-30 22:19:36.668880', 'step': 6171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.699423', 'step': 6171, 'epoch': 1} {'type': 'loss', 'content': 0.12117747962474823, 'timestamp': '2025-09-30 22:19:36.723095', 'step': 6172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.752989', 'step': 6172, 'epoch': 1} {'type': 'loss', 'content': 0.10675764083862305, 'timestamp': '2025-09-30 22:19:36.763028', 'step': 6173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.797019', 'step': 6173, 'epoch': 1} {'type': 'loss', 'content': 0.21253187954425812, 'timestamp': '2025-09-30 22:19:36.801418', 'step': 6174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.833401', 'step': 6174, 'epoch': 1} {'type': 'loss', 'content': 0.15820086002349854, 'timestamp': '2025-09-30 22:19:36.836128', 'step': 6175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.869259', 'step': 6175, 'epoch': 1} {'type': 'loss', 'content': 0.1339167058467865, 'timestamp': '2025-09-30 22:19:36.893711', 'step': 6176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.927033', 'step': 6176, 'epoch': 1} {'type': 'loss', 'content': 0.13380897045135498, 'timestamp': '2025-09-30 22:19:36.929717', 'step': 6177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:36.962497', 'step': 6177, 'epoch': 1} {'type': 'loss', 'content': 0.21218067407608032, 'timestamp': '2025-09-30 22:19:36.965281', 'step': 6178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:36.999192', 'step': 6178, 'epoch': 1} {'type': 'loss', 'content': 0.12185809016227722, 'timestamp': '2025-09-30 22:19:37.001877', 'step': 6179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:37.034384', 'step': 6179, 'epoch': 1} {'type': 'loss', 'content': 0.22818614542484283, 'timestamp': '2025-09-30 22:19:37.058267', 'step': 6180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:37.107862', 'step': 6180, 'epoch': 1} {'type': 'loss', 'content': 0.12564267218112946, 'timestamp': '2025-09-30 22:19:37.110405', 'step': 6181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:37.142396', 'step': 6181, 'epoch': 1} {'type': 'loss', 'content': 0.12216918915510178, 'timestamp': '2025-09-30 22:19:37.147537', 'step': 6182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.178694', 'step': 6182, 'epoch': 1} {'type': 'loss', 'content': 0.15255874395370483, 'timestamp': '2025-09-30 22:19:37.184058', 'step': 6183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.216512', 'step': 6183, 'epoch': 1} {'type': 'loss', 'content': 0.1722448468208313, 'timestamp': '2025-09-30 22:19:37.243895', 'step': 6184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:37.277553', 'step': 6184, 'epoch': 1} {'type': 'loss', 'content': 0.156819149851799, 'timestamp': '2025-09-30 22:19:37.284076', 'step': 6185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:37.318316', 'step': 6185, 'epoch': 1} {'type': 'loss', 'content': 0.10826032608747482, 'timestamp': '2025-09-30 22:19:37.322693', 'step': 6186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.356509', 'step': 6186, 'epoch': 1} {'type': 'loss', 'content': 0.2004883587360382, 'timestamp': '2025-09-30 22:19:37.363428', 'step': 6187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.400866', 'step': 6187, 'epoch': 1} {'type': 'loss', 'content': 0.12149246037006378, 'timestamp': '2025-09-30 22:19:37.431100', 'step': 6188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:37.467143', 'step': 6188, 'epoch': 1} {'type': 'loss', 'content': 0.06614992022514343, 'timestamp': '2025-09-30 22:19:37.472873', 'step': 6189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:37.507361', 'step': 6189, 'epoch': 1} {'type': 'loss', 'content': 0.2081310749053955, 'timestamp': '2025-09-30 22:19:37.516029', 'step': 6190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:37.550184', 'step': 6190, 'epoch': 1} {'type': 'loss', 'content': 0.09243801236152649, 'timestamp': '2025-09-30 22:19:37.552724', 'step': 6191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:37.587639', 'step': 6191, 'epoch': 1} {'type': 'loss', 'content': 0.2229219526052475, 'timestamp': '2025-09-30 22:19:37.613168', 'step': 6192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:37.649235', 'step': 6192, 'epoch': 1} {'type': 'loss', 'content': 0.22058157622814178, 'timestamp': '2025-09-30 22:19:37.655538', 'step': 6193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.690314', 'step': 6193, 'epoch': 1} {'type': 'loss', 'content': 0.19423474371433258, 'timestamp': '2025-09-30 22:19:37.696856', 'step': 6194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:37.727155', 'step': 6194, 'epoch': 1} {'type': 'loss', 'content': 0.16185849905014038, 'timestamp': '2025-09-30 22:19:37.730735', 'step': 6195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.761149', 'step': 6195, 'epoch': 1} {'type': 'loss', 'content': 0.09704943001270294, 'timestamp': '2025-09-30 22:19:37.786341', 'step': 6196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.820983', 'step': 6196, 'epoch': 1} {'type': 'loss', 'content': 0.17577050626277924, 'timestamp': '2025-09-30 22:19:37.840349', 'step': 6197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:37.874653', 'step': 6197, 'epoch': 1} {'type': 'loss', 'content': 0.10827650874853134, 'timestamp': '2025-09-30 22:19:37.881074', 'step': 6198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:37.914427', 'step': 6198, 'epoch': 1} {'type': 'loss', 'content': 0.10375846177339554, 'timestamp': '2025-09-30 22:19:37.917759', 'step': 6199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:37.949849', 'step': 6199, 'epoch': 1} {'type': 'loss', 'content': 0.10408029705286026, 'timestamp': '2025-09-30 22:19:37.977818', 'step': 6200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.015044', 'step': 6200, 'epoch': 1} {'type': 'loss', 'content': 0.22326816618442535, 'timestamp': '2025-09-30 22:19:38.024227', 'step': 6201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:38.066472', 'step': 6201, 'epoch': 1} {'type': 'loss', 'content': 0.1398983746767044, 'timestamp': '2025-09-30 22:19:38.070614', 'step': 6202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.104600', 'step': 6202, 'epoch': 1} {'type': 'loss', 'content': 0.15496785938739777, 'timestamp': '2025-09-30 22:19:38.107060', 'step': 6203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:38.144312', 'step': 6203, 'epoch': 1} {'type': 'loss', 'content': 0.17540724575519562, 'timestamp': '2025-09-30 22:19:38.172814', 'step': 6204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:38.202976', 'step': 6204, 'epoch': 1} {'type': 'loss', 'content': 0.13060900568962097, 'timestamp': '2025-09-30 22:19:38.208793', 'step': 6205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:38.242617', 'step': 6205, 'epoch': 1} {'type': 'loss', 'content': 0.2534894347190857, 'timestamp': '2025-09-30 22:19:38.254233', 'step': 6206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.292836', 'step': 6206, 'epoch': 1} {'type': 'loss', 'content': 0.12580034136772156, 'timestamp': '2025-09-30 22:19:38.296141', 'step': 6207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:38.334461', 'step': 6207, 'epoch': 1} {'type': 'loss', 'content': 0.12815657258033752, 'timestamp': '2025-09-30 22:19:38.367197', 'step': 6208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:38.405908', 'step': 6208, 'epoch': 1} {'type': 'loss', 'content': 0.19438554346561432, 'timestamp': '2025-09-30 22:19:38.409040', 'step': 6209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:38.453553', 'step': 6209, 'epoch': 1} {'type': 'loss', 'content': 0.1851351112127304, 'timestamp': '2025-09-30 22:19:38.456979', 'step': 6210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.498326', 'step': 6210, 'epoch': 1} {'type': 'loss', 'content': 0.19232136011123657, 'timestamp': '2025-09-30 22:19:38.509762', 'step': 6211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:38.544365', 'step': 6211, 'epoch': 1} {'type': 'loss', 'content': 0.15680105984210968, 'timestamp': '2025-09-30 22:19:38.574448', 'step': 6212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.611676', 'step': 6212, 'epoch': 1} {'type': 'loss', 'content': 0.16632214188575745, 'timestamp': '2025-09-30 22:19:38.618385', 'step': 6213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:38.662268', 'step': 6213, 'epoch': 1} {'type': 'loss', 'content': 0.12661844491958618, 'timestamp': '2025-09-30 22:19:38.664868', 'step': 6214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.700870', 'step': 6214, 'epoch': 1} {'type': 'loss', 'content': 0.0917142927646637, 'timestamp': '2025-09-30 22:19:38.704611', 'step': 6215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:38.739727', 'step': 6215, 'epoch': 1} {'type': 'loss', 'content': 0.15851028263568878, 'timestamp': '2025-09-30 22:19:38.771603', 'step': 6216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.803864', 'step': 6216, 'epoch': 1} {'type': 'loss', 'content': 0.1606495976448059, 'timestamp': '2025-09-30 22:19:38.810900', 'step': 6217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:38.844256', 'step': 6217, 'epoch': 1} {'type': 'loss', 'content': 0.2567257583141327, 'timestamp': '2025-09-30 22:19:38.851859', 'step': 6218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:38.889922', 'step': 6218, 'epoch': 1} {'type': 'loss', 'content': 0.33202505111694336, 'timestamp': '2025-09-30 22:19:38.896170', 'step': 6219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:38.927809', 'step': 6219, 'epoch': 1} {'type': 'loss', 'content': 0.11982075124979019, 'timestamp': '2025-09-30 22:19:38.957437', 'step': 6220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:38.992314', 'step': 6220, 'epoch': 1} {'type': 'loss', 'content': 0.13345453143119812, 'timestamp': '2025-09-30 22:19:38.995047', 'step': 6221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:39.026060', 'step': 6221, 'epoch': 1} {'type': 'loss', 'content': 0.13690413534641266, 'timestamp': '2025-09-30 22:19:39.029703', 'step': 6222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.064460', 'step': 6222, 'epoch': 1} {'type': 'loss', 'content': 0.11781442910432816, 'timestamp': '2025-09-30 22:19:39.071508', 'step': 6223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.109884', 'step': 6223, 'epoch': 1} {'type': 'loss', 'content': 0.11036937683820724, 'timestamp': '2025-09-30 22:19:39.134796', 'step': 6224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.166359', 'step': 6224, 'epoch': 1} {'type': 'loss', 'content': 0.09478136897087097, 'timestamp': '2025-09-30 22:19:39.169495', 'step': 6225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.200446', 'step': 6225, 'epoch': 1} {'type': 'loss', 'content': 0.15097671747207642, 'timestamp': '2025-09-30 22:19:39.207784', 'step': 6226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.241362', 'step': 6226, 'epoch': 1} {'type': 'loss', 'content': 0.21552661061286926, 'timestamp': '2025-09-30 22:19:39.245157', 'step': 6227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.281643', 'step': 6227, 'epoch': 1} {'type': 'loss', 'content': 0.2837596833705902, 'timestamp': '2025-09-30 22:19:39.306696', 'step': 6228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.352343', 'step': 6228, 'epoch': 1} {'type': 'loss', 'content': 0.12201982736587524, 'timestamp': '2025-09-30 22:19:39.360977', 'step': 6229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.392206', 'step': 6229, 'epoch': 1} {'type': 'loss', 'content': 0.1449204981327057, 'timestamp': '2025-09-30 22:19:39.394818', 'step': 6230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:39.432608', 'step': 6230, 'epoch': 1} {'type': 'loss', 'content': 0.1153552234172821, 'timestamp': '2025-09-30 22:19:39.440201', 'step': 6231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.477312', 'step': 6231, 'epoch': 1} {'type': 'loss', 'content': 0.25184717774391174, 'timestamp': '2025-09-30 22:19:39.501369', 'step': 6232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.532797', 'step': 6232, 'epoch': 1} {'type': 'loss', 'content': 0.10393825173377991, 'timestamp': '2025-09-30 22:19:39.535788', 'step': 6233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.571770', 'step': 6233, 'epoch': 1} {'type': 'loss', 'content': 0.1490902304649353, 'timestamp': '2025-09-30 22:19:39.574606', 'step': 6234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:39.605322', 'step': 6234, 'epoch': 1} {'type': 'loss', 'content': 0.17251282930374146, 'timestamp': '2025-09-30 22:19:39.608644', 'step': 6235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.646147', 'step': 6235, 'epoch': 1} {'type': 'loss', 'content': 0.13932402431964874, 'timestamp': '2025-09-30 22:19:39.677411', 'step': 6236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.709629', 'step': 6236, 'epoch': 1} {'type': 'loss', 'content': 0.13708873093128204, 'timestamp': '2025-09-30 22:19:39.712249', 'step': 6237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:39.745033', 'step': 6237, 'epoch': 1} {'type': 'loss', 'content': 0.09068703651428223, 'timestamp': '2025-09-30 22:19:39.758398', 'step': 6238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:39.790151', 'step': 6238, 'epoch': 1} {'type': 'loss', 'content': 0.25575122237205505, 'timestamp': '2025-09-30 22:19:39.803387', 'step': 6239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.844075', 'step': 6239, 'epoch': 1} {'type': 'loss', 'content': 0.18758900463581085, 'timestamp': '2025-09-30 22:19:39.868704', 'step': 6240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:39.900071', 'step': 6240, 'epoch': 1} {'type': 'loss', 'content': 0.16292594373226166, 'timestamp': '2025-09-30 22:19:39.904454', 'step': 6241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:39.937963', 'step': 6241, 'epoch': 1} {'type': 'loss', 'content': 0.2361636459827423, 'timestamp': '2025-09-30 22:19:39.951525', 'step': 6242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:39.988731', 'step': 6242, 'epoch': 1} {'type': 'loss', 'content': 0.13637083768844604, 'timestamp': '2025-09-30 22:19:39.996776', 'step': 6243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:40.027635', 'step': 6243, 'epoch': 1} {'type': 'loss', 'content': 0.17029763758182526, 'timestamp': '2025-09-30 22:19:40.059007', 'step': 6244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:40.090040', 'step': 6244, 'epoch': 1} {'type': 'loss', 'content': 0.052812833338975906, 'timestamp': '2025-09-30 22:19:40.100492', 'step': 6245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:40.139518', 'step': 6245, 'epoch': 1} {'type': 'loss', 'content': 0.11021263152360916, 'timestamp': '2025-09-30 22:19:40.160077', 'step': 6246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:40.204834', 'step': 6246, 'epoch': 1} {'type': 'loss', 'content': 0.22538483142852783, 'timestamp': '2025-09-30 22:19:40.208686', 'step': 6247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.257259', 'step': 6247, 'epoch': 1} {'type': 'loss', 'content': 0.12723049521446228, 'timestamp': '2025-09-30 22:19:40.282303', 'step': 6248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:40.319902', 'step': 6248, 'epoch': 1} {'type': 'loss', 'content': 0.15481501817703247, 'timestamp': '2025-09-30 22:19:40.330001', 'step': 6249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.362109', 'step': 6249, 'epoch': 1} {'type': 'loss', 'content': 0.18975242972373962, 'timestamp': '2025-09-30 22:19:40.365226', 'step': 6250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.403156', 'step': 6250, 'epoch': 1} {'type': 'loss', 'content': 0.1277397722005844, 'timestamp': '2025-09-30 22:19:40.414790', 'step': 6251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:40.445259', 'step': 6251, 'epoch': 1} {'type': 'loss', 'content': 0.11098997294902802, 'timestamp': '2025-09-30 22:19:40.471020', 'step': 6252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:40.502126', 'step': 6252, 'epoch': 1} {'type': 'loss', 'content': 0.09134247153997421, 'timestamp': '2025-09-30 22:19:40.505356', 'step': 6253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.536800', 'step': 6253, 'epoch': 1} {'type': 'loss', 'content': 0.22700971364974976, 'timestamp': '2025-09-30 22:19:40.540456', 'step': 6254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:40.571857', 'step': 6254, 'epoch': 1} {'type': 'loss', 'content': 0.29813823103904724, 'timestamp': '2025-09-30 22:19:40.583162', 'step': 6255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:19:40.623829', 'step': 6255, 'epoch': 1} {'type': 'loss', 'content': 0.12797164916992188, 'timestamp': '2025-09-30 22:19:40.649146', 'step': 6256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:40.701019', 'step': 6256, 'epoch': 1} {'type': 'loss', 'content': 0.11988905072212219, 'timestamp': '2025-09-30 22:19:40.712810', 'step': 6257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:40.758300', 'step': 6257, 'epoch': 1} {'type': 'loss', 'content': 0.13283509016036987, 'timestamp': '2025-09-30 22:19:40.770793', 'step': 6258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:19:40.803158', 'step': 6258, 'epoch': 1} {'type': 'loss', 'content': 0.1852605938911438, 'timestamp': '2025-09-30 22:19:40.818239', 'step': 6259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.859342', 'step': 6259, 'epoch': 1} {'type': 'loss', 'content': 0.17788323760032654, 'timestamp': '2025-09-30 22:19:40.884074', 'step': 6260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.915532', 'step': 6260, 'epoch': 1} {'type': 'loss', 'content': 0.07625849545001984, 'timestamp': '2025-09-30 22:19:40.919129', 'step': 6261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:40.961916', 'step': 6261, 'epoch': 1} {'type': 'loss', 'content': 0.10727504640817642, 'timestamp': '2025-09-30 22:19:40.965555', 'step': 6262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:40.998848', 'step': 6262, 'epoch': 1} {'type': 'loss', 'content': 0.1560211032629013, 'timestamp': '2025-09-30 22:19:41.002006', 'step': 6263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:41.034603', 'step': 6263, 'epoch': 1} {'type': 'loss', 'content': 0.1430068165063858, 'timestamp': '2025-09-30 22:19:41.059154', 'step': 6264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.092159', 'step': 6264, 'epoch': 1} {'type': 'loss', 'content': 0.1345624476671219, 'timestamp': '2025-09-30 22:19:41.094842', 'step': 6265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.136577', 'step': 6265, 'epoch': 1} {'type': 'loss', 'content': 0.20552189648151398, 'timestamp': '2025-09-30 22:19:41.139894', 'step': 6266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.181903', 'step': 6266, 'epoch': 1} {'type': 'loss', 'content': 0.1491750329732895, 'timestamp': '2025-09-30 22:19:41.184610', 'step': 6267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.216318', 'step': 6267, 'epoch': 1} {'type': 'loss', 'content': 0.16158947348594666, 'timestamp': '2025-09-30 22:19:41.241815', 'step': 6268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.288309', 'step': 6268, 'epoch': 1} {'type': 'loss', 'content': 0.16289131343364716, 'timestamp': '2025-09-30 22:19:41.295836', 'step': 6269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:41.347822', 'step': 6269, 'epoch': 1} {'type': 'loss', 'content': 0.15517835319042206, 'timestamp': '2025-09-30 22:19:41.355747', 'step': 6270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:41.392433', 'step': 6270, 'epoch': 1} {'type': 'loss', 'content': 0.20155450701713562, 'timestamp': '2025-09-30 22:19:41.402409', 'step': 6271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.436078', 'step': 6271, 'epoch': 1} {'type': 'loss', 'content': 0.14319300651550293, 'timestamp': '2025-09-30 22:19:41.467162', 'step': 6272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.499646', 'step': 6272, 'epoch': 1} {'type': 'loss', 'content': 0.11611945182085037, 'timestamp': '2025-09-30 22:19:41.502453', 'step': 6273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:41.544873', 'step': 6273, 'epoch': 1} {'type': 'loss', 'content': 0.1639612764120102, 'timestamp': '2025-09-30 22:19:41.548756', 'step': 6274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:41.580238', 'step': 6274, 'epoch': 1} {'type': 'loss', 'content': 0.13423746824264526, 'timestamp': '2025-09-30 22:19:41.594676', 'step': 6275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.663008', 'step': 6275, 'epoch': 1} {'type': 'loss', 'content': 0.224620059132576, 'timestamp': '2025-09-30 22:19:41.696611', 'step': 6276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.733885', 'step': 6276, 'epoch': 1} {'type': 'loss', 'content': 0.13794121146202087, 'timestamp': '2025-09-30 22:19:41.737740', 'step': 6277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.770049', 'step': 6277, 'epoch': 1} {'type': 'loss', 'content': 0.2054423689842224, 'timestamp': '2025-09-30 22:19:41.780936', 'step': 6278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.813083', 'step': 6278, 'epoch': 1} {'type': 'loss', 'content': 0.19049479067325592, 'timestamp': '2025-09-30 22:19:41.815638', 'step': 6279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:41.851913', 'step': 6279, 'epoch': 1} {'type': 'loss', 'content': 0.13420243561267853, 'timestamp': '2025-09-30 22:19:41.884862', 'step': 6280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.923740', 'step': 6280, 'epoch': 1} {'type': 'loss', 'content': 0.15930713713169098, 'timestamp': '2025-09-30 22:19:41.926753', 'step': 6281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:41.958057', 'step': 6281, 'epoch': 1} {'type': 'loss', 'content': 0.17550653219223022, 'timestamp': '2025-09-30 22:19:41.960996', 'step': 6282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:41.994688', 'step': 6282, 'epoch': 1} {'type': 'loss', 'content': 0.3049207031726837, 'timestamp': '2025-09-30 22:19:42.004818', 'step': 6283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:42.045001', 'step': 6283, 'epoch': 1} {'type': 'loss', 'content': 0.14591722190380096, 'timestamp': '2025-09-30 22:19:42.078194', 'step': 6284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:42.109706', 'step': 6284, 'epoch': 1} {'type': 'loss', 'content': 0.1567452847957611, 'timestamp': '2025-09-30 22:19:42.118980', 'step': 6285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:42.151193', 'step': 6285, 'epoch': 1} {'type': 'loss', 'content': 0.15733174979686737, 'timestamp': '2025-09-30 22:19:42.161914', 'step': 6286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:42.201558', 'step': 6286, 'epoch': 1} {'type': 'loss', 'content': 0.17330968379974365, 'timestamp': '2025-09-30 22:19:42.213244', 'step': 6287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.253604', 'step': 6287, 'epoch': 1} {'type': 'loss', 'content': 0.22221997380256653, 'timestamp': '2025-09-30 22:19:42.279010', 'step': 6288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:42.317722', 'step': 6288, 'epoch': 1} {'type': 'loss', 'content': 0.17912860214710236, 'timestamp': '2025-09-30 22:19:42.320828', 'step': 6289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:42.357163', 'step': 6289, 'epoch': 1} {'type': 'loss', 'content': 0.11191901564598083, 'timestamp': '2025-09-30 22:19:42.364790', 'step': 6290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.401386', 'step': 6290, 'epoch': 1} {'type': 'loss', 'content': 0.24648433923721313, 'timestamp': '2025-09-30 22:19:42.409100', 'step': 6291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:42.445052', 'step': 6291, 'epoch': 1} {'type': 'loss', 'content': 0.2406013458967209, 'timestamp': '2025-09-30 22:19:42.474163', 'step': 6292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:42.505579', 'step': 6292, 'epoch': 1} {'type': 'loss', 'content': 0.09273186326026917, 'timestamp': '2025-09-30 22:19:42.513417', 'step': 6293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:42.551059', 'step': 6293, 'epoch': 1} {'type': 'loss', 'content': 0.3265850245952606, 'timestamp': '2025-09-30 22:19:42.554253', 'step': 6294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.585115', 'step': 6294, 'epoch': 1} {'type': 'loss', 'content': 0.2088058739900589, 'timestamp': '2025-09-30 22:19:42.588396', 'step': 6295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:42.621221', 'step': 6295, 'epoch': 1} {'type': 'loss', 'content': 0.09467032551765442, 'timestamp': '2025-09-30 22:19:42.653232', 'step': 6296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.684169', 'step': 6296, 'epoch': 1} {'type': 'loss', 'content': 0.11540775746107101, 'timestamp': '2025-09-30 22:19:42.692573', 'step': 6297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:42.731435', 'step': 6297, 'epoch': 1} {'type': 'loss', 'content': 0.17798703908920288, 'timestamp': '2025-09-30 22:19:42.739416', 'step': 6298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.771397', 'step': 6298, 'epoch': 1} {'type': 'loss', 'content': 0.14227844774723053, 'timestamp': '2025-09-30 22:19:42.776848', 'step': 6299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:42.809438', 'step': 6299, 'epoch': 1} {'type': 'loss', 'content': 0.19191469252109528, 'timestamp': '2025-09-30 22:19:42.834853', 'step': 6300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:42.873964', 'step': 6300, 'epoch': 1} {'type': 'loss', 'content': 0.2064822018146515, 'timestamp': '2025-09-30 22:19:42.877241', 'step': 6301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:42.915494', 'step': 6301, 'epoch': 1} {'type': 'loss', 'content': 0.11436554044485092, 'timestamp': '2025-09-30 22:19:42.930436', 'step': 6302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:42.961826', 'step': 6302, 'epoch': 1} {'type': 'loss', 'content': 0.11132022738456726, 'timestamp': '2025-09-30 22:19:42.966354', 'step': 6303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:42.998412', 'step': 6303, 'epoch': 1} {'type': 'loss', 'content': 0.1511383205652237, 'timestamp': '2025-09-30 22:19:43.028936', 'step': 6304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:43.066941', 'step': 6304, 'epoch': 1} {'type': 'loss', 'content': 0.11599529534578323, 'timestamp': '2025-09-30 22:19:43.070517', 'step': 6305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:43.110193', 'step': 6305, 'epoch': 1} {'type': 'loss', 'content': 0.14186979830265045, 'timestamp': '2025-09-30 22:19:43.113604', 'step': 6306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:43.144502', 'step': 6306, 'epoch': 1} {'type': 'loss', 'content': 0.18615342676639557, 'timestamp': '2025-09-30 22:19:43.147678', 'step': 6307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:19:43.188103', 'step': 6307, 'epoch': 1} {'type': 'loss', 'content': 0.07592745870351791, 'timestamp': '2025-09-30 22:19:43.219600', 'step': 6308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:43.251440', 'step': 6308, 'epoch': 1} {'type': 'loss', 'content': 0.11253328621387482, 'timestamp': '2025-09-30 22:19:43.254849', 'step': 6309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:43.296186', 'step': 6309, 'epoch': 1} {'type': 'loss', 'content': 0.13719899952411652, 'timestamp': '2025-09-30 22:19:43.307358', 'step': 6310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.347922', 'step': 6310, 'epoch': 1} {'type': 'loss', 'content': 0.1354006677865982, 'timestamp': '2025-09-30 22:19:43.363226', 'step': 6311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.394078', 'step': 6311, 'epoch': 1} {'type': 'loss', 'content': 0.07905767112970352, 'timestamp': '2025-09-30 22:19:43.426210', 'step': 6312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:43.466007', 'step': 6312, 'epoch': 1} {'type': 'loss', 'content': 0.14504627883434296, 'timestamp': '2025-09-30 22:19:43.477408', 'step': 6313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:43.509447', 'step': 6313, 'epoch': 1} {'type': 'loss', 'content': 0.13175874948501587, 'timestamp': '2025-09-30 22:19:43.520888', 'step': 6314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.559290', 'step': 6314, 'epoch': 1} {'type': 'loss', 'content': 0.21889641880989075, 'timestamp': '2025-09-30 22:19:43.563999', 'step': 6315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:43.602582', 'step': 6315, 'epoch': 1} {'type': 'loss', 'content': 0.19788801670074463, 'timestamp': '2025-09-30 22:19:43.633516', 'step': 6316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.670119', 'step': 6316, 'epoch': 1} {'type': 'loss', 'content': 0.21275511384010315, 'timestamp': '2025-09-30 22:19:43.679664', 'step': 6317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:43.717564', 'step': 6317, 'epoch': 1} {'type': 'loss', 'content': 0.28670185804367065, 'timestamp': '2025-09-30 22:19:43.721381', 'step': 6318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.753343', 'step': 6318, 'epoch': 1} {'type': 'loss', 'content': 0.2664291262626648, 'timestamp': '2025-09-30 22:19:43.763237', 'step': 6319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:43.810101', 'step': 6319, 'epoch': 1} {'type': 'loss', 'content': 0.12600134313106537, 'timestamp': '2025-09-30 22:19:43.834415', 'step': 6320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.864712', 'step': 6320, 'epoch': 1} {'type': 'loss', 'content': 0.16379599273204803, 'timestamp': '2025-09-30 22:19:43.868833', 'step': 6321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:43.909582', 'step': 6321, 'epoch': 1} {'type': 'loss', 'content': 0.23839469254016876, 'timestamp': '2025-09-30 22:19:43.912862', 'step': 6322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:43.944396', 'step': 6322, 'epoch': 1} {'type': 'loss', 'content': 0.13028714060783386, 'timestamp': '2025-09-30 22:19:43.955916', 'step': 6323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:43.987717', 'step': 6323, 'epoch': 1} {'type': 'loss', 'content': 0.11301126331090927, 'timestamp': '2025-09-30 22:19:44.020275', 'step': 6324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:44.051476', 'step': 6324, 'epoch': 1} {'type': 'loss', 'content': 0.1713961958885193, 'timestamp': '2025-09-30 22:19:44.063164', 'step': 6325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:44.103593', 'step': 6325, 'epoch': 1} {'type': 'loss', 'content': 0.13120749592781067, 'timestamp': '2025-09-30 22:19:44.107497', 'step': 6326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:44.139085', 'step': 6326, 'epoch': 1} {'type': 'loss', 'content': 0.10707595199346542, 'timestamp': '2025-09-30 22:19:44.144727', 'step': 6327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.177213', 'step': 6327, 'epoch': 1} {'type': 'loss', 'content': 0.13127176463603973, 'timestamp': '2025-09-30 22:19:44.203049', 'step': 6328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:44.248774', 'step': 6328, 'epoch': 1} {'type': 'loss', 'content': 0.12716197967529297, 'timestamp': '2025-09-30 22:19:44.255450', 'step': 6329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.288246', 'step': 6329, 'epoch': 1} {'type': 'loss', 'content': 0.1294754296541214, 'timestamp': '2025-09-30 22:19:44.292744', 'step': 6330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.324575', 'step': 6330, 'epoch': 1} {'type': 'loss', 'content': 0.16150274872779846, 'timestamp': '2025-09-30 22:19:44.337342', 'step': 6331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.369898', 'step': 6331, 'epoch': 1} {'type': 'loss', 'content': 0.13354073464870453, 'timestamp': '2025-09-30 22:19:44.395255', 'step': 6332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:44.426070', 'step': 6332, 'epoch': 1} {'type': 'loss', 'content': 0.10015957057476044, 'timestamp': '2025-09-30 22:19:44.429108', 'step': 6333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:44.460327', 'step': 6333, 'epoch': 1} {'type': 'loss', 'content': 0.1471555531024933, 'timestamp': '2025-09-30 22:19:44.462951', 'step': 6334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.494890', 'step': 6334, 'epoch': 1} {'type': 'loss', 'content': 0.1202402114868164, 'timestamp': '2025-09-30 22:19:44.505072', 'step': 6335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.537034', 'step': 6335, 'epoch': 1} {'type': 'loss', 'content': 0.19422593712806702, 'timestamp': '2025-09-30 22:19:44.561115', 'step': 6336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:44.592194', 'step': 6336, 'epoch': 1} {'type': 'loss', 'content': 0.11402326077222824, 'timestamp': '2025-09-30 22:19:44.595605', 'step': 6337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:44.628260', 'step': 6337, 'epoch': 1} {'type': 'loss', 'content': 0.16115586459636688, 'timestamp': '2025-09-30 22:19:44.640814', 'step': 6338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:44.681027', 'step': 6338, 'epoch': 1} {'type': 'loss', 'content': 0.1826484203338623, 'timestamp': '2025-09-30 22:19:44.686387', 'step': 6339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.717597', 'step': 6339, 'epoch': 1} {'type': 'loss', 'content': 0.12822113931179047, 'timestamp': '2025-09-30 22:19:44.749893', 'step': 6340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:44.790588', 'step': 6340, 'epoch': 1} {'type': 'loss', 'content': 0.15178364515304565, 'timestamp': '2025-09-30 22:19:44.793344', 'step': 6341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:44.826776', 'step': 6341, 'epoch': 1} {'type': 'loss', 'content': 0.21481606364250183, 'timestamp': '2025-09-30 22:19:44.831517', 'step': 6342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:44.863963', 'step': 6342, 'epoch': 1} {'type': 'loss', 'content': 0.22709457576274872, 'timestamp': '2025-09-30 22:19:44.877934', 'step': 6343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:44.910295', 'step': 6343, 'epoch': 1} {'type': 'loss', 'content': 0.1394752711057663, 'timestamp': '2025-09-30 22:19:44.936647', 'step': 6344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:44.967706', 'step': 6344, 'epoch': 1} {'type': 'loss', 'content': 0.20033441483974457, 'timestamp': '2025-09-30 22:19:44.971087', 'step': 6345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.001300', 'step': 6345, 'epoch': 1} {'type': 'loss', 'content': 0.12762102484703064, 'timestamp': '2025-09-30 22:19:45.011717', 'step': 6346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.052041', 'step': 6346, 'epoch': 1} {'type': 'loss', 'content': 0.11911246925592422, 'timestamp': '2025-09-30 22:19:45.054692', 'step': 6347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.092539', 'step': 6347, 'epoch': 1} {'type': 'loss', 'content': 0.12763622403144836, 'timestamp': '2025-09-30 22:19:45.116917', 'step': 6348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:45.157003', 'step': 6348, 'epoch': 1} {'type': 'loss', 'content': 0.1339023858308792, 'timestamp': '2025-09-30 22:19:45.171118', 'step': 6349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.202327', 'step': 6349, 'epoch': 1} {'type': 'loss', 'content': 0.17874033749103546, 'timestamp': '2025-09-30 22:19:45.217929', 'step': 6350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.255158', 'step': 6350, 'epoch': 1} {'type': 'loss', 'content': 0.1423540562391281, 'timestamp': '2025-09-30 22:19:45.259016', 'step': 6351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:45.290935', 'step': 6351, 'epoch': 1} {'type': 'loss', 'content': 0.14152227342128754, 'timestamp': '2025-09-30 22:19:45.324019', 'step': 6352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.364308', 'step': 6352, 'epoch': 1} {'type': 'loss', 'content': 0.14557750523090363, 'timestamp': '2025-09-30 22:19:45.367127', 'step': 6353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.403473', 'step': 6353, 'epoch': 1} {'type': 'loss', 'content': 0.14735554158687592, 'timestamp': '2025-09-30 22:19:45.406682', 'step': 6354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.438860', 'step': 6354, 'epoch': 1} {'type': 'loss', 'content': 0.1722574084997177, 'timestamp': '2025-09-30 22:19:45.456331', 'step': 6355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.487974', 'step': 6355, 'epoch': 1} {'type': 'loss', 'content': 0.15342603623867035, 'timestamp': '2025-09-30 22:19:45.526425', 'step': 6356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:45.570097', 'step': 6356, 'epoch': 1} {'type': 'loss', 'content': 0.14474710822105408, 'timestamp': '2025-09-30 22:19:45.575875', 'step': 6357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.624843', 'step': 6357, 'epoch': 1} {'type': 'loss', 'content': 0.15046431124210358, 'timestamp': '2025-09-30 22:19:45.628892', 'step': 6358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:45.667356', 'step': 6358, 'epoch': 1} {'type': 'loss', 'content': 0.12517079710960388, 'timestamp': '2025-09-30 22:19:45.678868', 'step': 6359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.711507', 'step': 6359, 'epoch': 1} {'type': 'loss', 'content': 0.25439101457595825, 'timestamp': '2025-09-30 22:19:45.737421', 'step': 6360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.777691', 'step': 6360, 'epoch': 1} {'type': 'loss', 'content': 0.09924668073654175, 'timestamp': '2025-09-30 22:19:45.781559', 'step': 6361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.823208', 'step': 6361, 'epoch': 1} {'type': 'loss', 'content': 0.13301636278629303, 'timestamp': '2025-09-30 22:19:45.835760', 'step': 6362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:45.877422', 'step': 6362, 'epoch': 1} {'type': 'loss', 'content': 0.12157852202653885, 'timestamp': '2025-09-30 22:19:45.883343', 'step': 6363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:45.914602', 'step': 6363, 'epoch': 1} {'type': 'loss', 'content': 0.21439702808856964, 'timestamp': '2025-09-30 22:19:45.940411', 'step': 6364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:45.975098', 'step': 6364, 'epoch': 1} {'type': 'loss', 'content': 0.18522034585475922, 'timestamp': '2025-09-30 22:19:45.977822', 'step': 6365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.009579', 'step': 6365, 'epoch': 1} {'type': 'loss', 'content': 0.13112427294254303, 'timestamp': '2025-09-30 22:19:46.022548', 'step': 6366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.056566', 'step': 6366, 'epoch': 1} {'type': 'loss', 'content': 0.10970168560743332, 'timestamp': '2025-09-30 22:19:46.059550', 'step': 6367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:46.091153', 'step': 6367, 'epoch': 1} {'type': 'loss', 'content': 0.12657149136066437, 'timestamp': '2025-09-30 22:19:46.124694', 'step': 6368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:46.156665', 'step': 6368, 'epoch': 1} {'type': 'loss', 'content': 0.168757364153862, 'timestamp': '2025-09-30 22:19:46.160335', 'step': 6369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:46.192328', 'step': 6369, 'epoch': 1} {'type': 'loss', 'content': 0.16828322410583496, 'timestamp': '2025-09-30 22:19:46.195199', 'step': 6370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.227335', 'step': 6370, 'epoch': 1} {'type': 'loss', 'content': 0.15136554837226868, 'timestamp': '2025-09-30 22:19:46.230354', 'step': 6371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.271100', 'step': 6371, 'epoch': 1} {'type': 'loss', 'content': 0.07809761166572571, 'timestamp': '2025-09-30 22:19:46.297185', 'step': 6372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.340158', 'step': 6372, 'epoch': 1} {'type': 'loss', 'content': 0.18387582898139954, 'timestamp': '2025-09-30 22:19:46.354701', 'step': 6373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.395566', 'step': 6373, 'epoch': 1} {'type': 'loss', 'content': 0.10127948969602585, 'timestamp': '2025-09-30 22:19:46.398381', 'step': 6374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.429568', 'step': 6374, 'epoch': 1} {'type': 'loss', 'content': 0.15531139075756073, 'timestamp': '2025-09-30 22:19:46.432734', 'step': 6375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.463603', 'step': 6375, 'epoch': 1} {'type': 'loss', 'content': 0.21594461798667908, 'timestamp': '2025-09-30 22:19:46.488803', 'step': 6376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:46.520870', 'step': 6376, 'epoch': 1} {'type': 'loss', 'content': 0.17531028389930725, 'timestamp': '2025-09-30 22:19:46.531758', 'step': 6377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.577453', 'step': 6377, 'epoch': 1} {'type': 'loss', 'content': 0.11644246429204941, 'timestamp': '2025-09-30 22:19:46.580884', 'step': 6378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:46.624017', 'step': 6378, 'epoch': 1} {'type': 'loss', 'content': 0.16423940658569336, 'timestamp': '2025-09-30 22:19:46.637009', 'step': 6379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.668254', 'step': 6379, 'epoch': 1} {'type': 'loss', 'content': 0.18308314681053162, 'timestamp': '2025-09-30 22:19:46.693335', 'step': 6380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.724804', 'step': 6380, 'epoch': 1} {'type': 'loss', 'content': 0.10699626058340073, 'timestamp': '2025-09-30 22:19:46.727880', 'step': 6381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:46.759824', 'step': 6381, 'epoch': 1} {'type': 'loss', 'content': 0.15552842617034912, 'timestamp': '2025-09-30 22:19:46.762877', 'step': 6382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:46.804071', 'step': 6382, 'epoch': 1} {'type': 'loss', 'content': 0.1252419799566269, 'timestamp': '2025-09-30 22:19:46.807082', 'step': 6383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:46.838869', 'step': 6383, 'epoch': 1} {'type': 'loss', 'content': 0.11018245667219162, 'timestamp': '2025-09-30 22:19:46.867743', 'step': 6384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.898879', 'step': 6384, 'epoch': 1} {'type': 'loss', 'content': 0.20256495475769043, 'timestamp': '2025-09-30 22:19:46.907537', 'step': 6385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:46.938385', 'step': 6385, 'epoch': 1} {'type': 'loss', 'content': 0.21510277688503265, 'timestamp': '2025-09-30 22:19:46.942095', 'step': 6386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:46.978599', 'step': 6386, 'epoch': 1} {'type': 'loss', 'content': 0.2173551470041275, 'timestamp': '2025-09-30 22:19:46.982143', 'step': 6387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.019948', 'step': 6387, 'epoch': 1} {'type': 'loss', 'content': 0.2083202451467514, 'timestamp': '2025-09-30 22:19:47.052976', 'step': 6388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.083901', 'step': 6388, 'epoch': 1} {'type': 'loss', 'content': 0.165815070271492, 'timestamp': '2025-09-30 22:19:47.087672', 'step': 6389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:47.126936', 'step': 6389, 'epoch': 1} {'type': 'loss', 'content': 0.13253498077392578, 'timestamp': '2025-09-30 22:19:47.131148', 'step': 6390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.172213', 'step': 6390, 'epoch': 1} {'type': 'loss', 'content': 0.11967296153306961, 'timestamp': '2025-09-30 22:19:47.176269', 'step': 6391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.207044', 'step': 6391, 'epoch': 1} {'type': 'loss', 'content': 0.23012393712997437, 'timestamp': '2025-09-30 22:19:47.240302', 'step': 6392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.273212', 'step': 6392, 'epoch': 1} {'type': 'loss', 'content': 0.13489845395088196, 'timestamp': '2025-09-30 22:19:47.283239', 'step': 6393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.322547', 'step': 6393, 'epoch': 1} {'type': 'loss', 'content': 0.17067736387252808, 'timestamp': '2025-09-30 22:19:47.326060', 'step': 6394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.359559', 'step': 6394, 'epoch': 1} {'type': 'loss', 'content': 0.15327735245227814, 'timestamp': '2025-09-30 22:19:47.370845', 'step': 6395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:47.410178', 'step': 6395, 'epoch': 1} {'type': 'loss', 'content': 0.20955508947372437, 'timestamp': '2025-09-30 22:19:47.435099', 'step': 6396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.470368', 'step': 6396, 'epoch': 1} {'type': 'loss', 'content': 0.21876315772533417, 'timestamp': '2025-09-30 22:19:47.476935', 'step': 6397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.513301', 'step': 6397, 'epoch': 1} {'type': 'loss', 'content': 0.17994427680969238, 'timestamp': '2025-09-30 22:19:47.519579', 'step': 6398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.551717', 'step': 6398, 'epoch': 1} {'type': 'loss', 'content': 0.19410212337970734, 'timestamp': '2025-09-30 22:19:47.554946', 'step': 6399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.594364', 'step': 6399, 'epoch': 1} {'type': 'loss', 'content': 0.11462949961423874, 'timestamp': '2025-09-30 22:19:47.624360', 'step': 6400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.655399', 'step': 6400, 'epoch': 1} {'type': 'loss', 'content': 0.18209554255008698, 'timestamp': '2025-09-30 22:19:47.658559', 'step': 6401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.689896', 'step': 6401, 'epoch': 1} {'type': 'loss', 'content': 0.17809297144412994, 'timestamp': '2025-09-30 22:19:47.695793', 'step': 6402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.729837', 'step': 6402, 'epoch': 1} {'type': 'loss', 'content': 0.1685343235731125, 'timestamp': '2025-09-30 22:19:47.732656', 'step': 6403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:47.765165', 'step': 6403, 'epoch': 1} {'type': 'loss', 'content': 0.12957990169525146, 'timestamp': '2025-09-30 22:19:47.795377', 'step': 6404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:47.825318', 'step': 6404, 'epoch': 1} {'type': 'loss', 'content': 0.21440817415714264, 'timestamp': '2025-09-30 22:19:47.835408', 'step': 6405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:47.871292', 'step': 6405, 'epoch': 1} {'type': 'loss', 'content': 0.07297654449939728, 'timestamp': '2025-09-30 22:19:47.880366', 'step': 6406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:47.916571', 'step': 6406, 'epoch': 1} {'type': 'loss', 'content': 0.17788687348365784, 'timestamp': '2025-09-30 22:19:47.924837', 'step': 6407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:47.956778', 'step': 6407, 'epoch': 1} {'type': 'loss', 'content': 0.11982687562704086, 'timestamp': '2025-09-30 22:19:47.985925', 'step': 6408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.015879', 'step': 6408, 'epoch': 1} {'type': 'loss', 'content': 0.12479006499052048, 'timestamp': '2025-09-30 22:19:48.022933', 'step': 6409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.060480', 'step': 6409, 'epoch': 1} {'type': 'loss', 'content': 0.13898278772830963, 'timestamp': '2025-09-30 22:19:48.064581', 'step': 6410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.104083', 'step': 6410, 'epoch': 1} {'type': 'loss', 'content': 0.08377791941165924, 'timestamp': '2025-09-30 22:19:48.115869', 'step': 6411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.146162', 'step': 6411, 'epoch': 1} {'type': 'loss', 'content': 0.14060184359550476, 'timestamp': '2025-09-30 22:19:48.170613', 'step': 6412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:19:48.210146', 'step': 6412, 'epoch': 1} {'type': 'loss', 'content': 0.11959313601255417, 'timestamp': '2025-09-30 22:19:48.221124', 'step': 6413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:48.251618', 'step': 6413, 'epoch': 1} {'type': 'loss', 'content': 0.17684367299079895, 'timestamp': '2025-09-30 22:19:48.254163', 'step': 6414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:48.284505', 'step': 6414, 'epoch': 1} {'type': 'loss', 'content': 0.1289043426513672, 'timestamp': '2025-09-30 22:19:48.291652', 'step': 6415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.326728', 'step': 6415, 'epoch': 1} {'type': 'loss', 'content': 0.13025443255901337, 'timestamp': '2025-09-30 22:19:48.353274', 'step': 6416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.387537', 'step': 6416, 'epoch': 1} {'type': 'loss', 'content': 0.13539506494998932, 'timestamp': '2025-09-30 22:19:48.390405', 'step': 6417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.426325', 'step': 6417, 'epoch': 1} {'type': 'loss', 'content': 0.08192888647317886, 'timestamp': '2025-09-30 22:19:48.433446', 'step': 6418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.468774', 'step': 6418, 'epoch': 1} {'type': 'loss', 'content': 0.18557801842689514, 'timestamp': '2025-09-30 22:19:48.473232', 'step': 6419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.506835', 'step': 6419, 'epoch': 1} {'type': 'loss', 'content': 0.17977632582187653, 'timestamp': '2025-09-30 22:19:48.535791', 'step': 6420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.565771', 'step': 6420, 'epoch': 1} {'type': 'loss', 'content': 0.15263906121253967, 'timestamp': '2025-09-30 22:19:48.575015', 'step': 6421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.610157', 'step': 6421, 'epoch': 1} {'type': 'loss', 'content': 0.2322545051574707, 'timestamp': '2025-09-30 22:19:48.616559', 'step': 6422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.651156', 'step': 6422, 'epoch': 1} {'type': 'loss', 'content': 0.1284438818693161, 'timestamp': '2025-09-30 22:19:48.659831', 'step': 6423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:48.696296', 'step': 6423, 'epoch': 1} {'type': 'loss', 'content': 0.10632894188165665, 'timestamp': '2025-09-30 22:19:48.726807', 'step': 6424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.763282', 'step': 6424, 'epoch': 1} {'type': 'loss', 'content': 0.0519857257604599, 'timestamp': '2025-09-30 22:19:48.769006', 'step': 6425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.802636', 'step': 6425, 'epoch': 1} {'type': 'loss', 'content': 0.11931384354829788, 'timestamp': '2025-09-30 22:19:48.804895', 'step': 6426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:48.835828', 'step': 6426, 'epoch': 1} {'type': 'loss', 'content': 0.1385829895734787, 'timestamp': '2025-09-30 22:19:48.842663', 'step': 6427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:48.873682', 'step': 6427, 'epoch': 1} {'type': 'loss', 'content': 0.20155154168605804, 'timestamp': '2025-09-30 22:19:48.900116', 'step': 6428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:48.930663', 'step': 6428, 'epoch': 1} {'type': 'loss', 'content': 0.12445227056741714, 'timestamp': '2025-09-30 22:19:48.935245', 'step': 6429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:48.971837', 'step': 6429, 'epoch': 1} {'type': 'loss', 'content': 0.16765393316745758, 'timestamp': '2025-09-30 22:19:48.979221', 'step': 6430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.010230', 'step': 6430, 'epoch': 1} {'type': 'loss', 'content': 0.15794110298156738, 'timestamp': '2025-09-30 22:19:49.015639', 'step': 6431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:49.050606', 'step': 6431, 'epoch': 1} {'type': 'loss', 'content': 0.18145474791526794, 'timestamp': '2025-09-30 22:19:49.074931', 'step': 6432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.106476', 'step': 6432, 'epoch': 1} {'type': 'loss', 'content': 0.14929120242595673, 'timestamp': '2025-09-30 22:19:49.119510', 'step': 6433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:49.150418', 'step': 6433, 'epoch': 1} {'type': 'loss', 'content': 0.08546648919582367, 'timestamp': '2025-09-30 22:19:49.162300', 'step': 6434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.192122', 'step': 6434, 'epoch': 1} {'type': 'loss', 'content': 0.18367378413677216, 'timestamp': '2025-09-30 22:19:49.194859', 'step': 6435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.230803', 'step': 6435, 'epoch': 1} {'type': 'loss', 'content': 0.1631300002336502, 'timestamp': '2025-09-30 22:19:49.255235', 'step': 6436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.290024', 'step': 6436, 'epoch': 1} {'type': 'loss', 'content': 0.17010381817817688, 'timestamp': '2025-09-30 22:19:49.293017', 'step': 6437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.326335', 'step': 6437, 'epoch': 1} {'type': 'loss', 'content': 0.13428308069705963, 'timestamp': '2025-09-30 22:19:49.331696', 'step': 6438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.368097', 'step': 6438, 'epoch': 1} {'type': 'loss', 'content': 0.12836025655269623, 'timestamp': '2025-09-30 22:19:49.372678', 'step': 6439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.406189', 'step': 6439, 'epoch': 1} {'type': 'loss', 'content': 0.15871983766555786, 'timestamp': '2025-09-30 22:19:49.429895', 'step': 6440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.459945', 'step': 6440, 'epoch': 1} {'type': 'loss', 'content': 0.15902167558670044, 'timestamp': '2025-09-30 22:19:49.465347', 'step': 6441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.497451', 'step': 6441, 'epoch': 1} {'type': 'loss', 'content': 0.14435945451259613, 'timestamp': '2025-09-30 22:19:49.502239', 'step': 6442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.534457', 'step': 6442, 'epoch': 1} {'type': 'loss', 'content': 0.07707183808088303, 'timestamp': '2025-09-30 22:19:49.539404', 'step': 6443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.571748', 'step': 6443, 'epoch': 1} {'type': 'loss', 'content': 0.16096486151218414, 'timestamp': '2025-09-30 22:19:49.596547', 'step': 6444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:49.630049', 'step': 6444, 'epoch': 1} {'type': 'loss', 'content': 0.1753205806016922, 'timestamp': '2025-09-30 22:19:49.636911', 'step': 6445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.669194', 'step': 6445, 'epoch': 1} {'type': 'loss', 'content': 0.2252691686153412, 'timestamp': '2025-09-30 22:19:49.674522', 'step': 6446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:49.706700', 'step': 6446, 'epoch': 1} {'type': 'loss', 'content': 0.2115807831287384, 'timestamp': '2025-09-30 22:19:49.711902', 'step': 6447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.745321', 'step': 6447, 'epoch': 1} {'type': 'loss', 'content': 0.23892837762832642, 'timestamp': '2025-09-30 22:19:49.769867', 'step': 6448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.803659', 'step': 6448, 'epoch': 1} {'type': 'loss', 'content': 0.14799357950687408, 'timestamp': '2025-09-30 22:19:49.808115', 'step': 6449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:49.838683', 'step': 6449, 'epoch': 1} {'type': 'loss', 'content': 0.10432092100381851, 'timestamp': '2025-09-30 22:19:49.843527', 'step': 6450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.874012', 'step': 6450, 'epoch': 1} {'type': 'loss', 'content': 0.11447962373495102, 'timestamp': '2025-09-30 22:19:49.878322', 'step': 6451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:49.911059', 'step': 6451, 'epoch': 1} {'type': 'loss', 'content': 0.13693475723266602, 'timestamp': '2025-09-30 22:19:49.945615', 'step': 6452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:49.979513', 'step': 6452, 'epoch': 1} {'type': 'loss', 'content': 0.14947152137756348, 'timestamp': '2025-09-30 22:19:49.982365', 'step': 6453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.016874', 'step': 6453, 'epoch': 1} {'type': 'loss', 'content': 0.13730311393737793, 'timestamp': '2025-09-30 22:19:50.026453', 'step': 6454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:50.061825', 'step': 6454, 'epoch': 1} {'type': 'loss', 'content': 0.13744965195655823, 'timestamp': '2025-09-30 22:19:50.064886', 'step': 6455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:50.094857', 'step': 6455, 'epoch': 1} {'type': 'loss', 'content': 0.14237625896930695, 'timestamp': '2025-09-30 22:19:50.118745', 'step': 6456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:50.150656', 'step': 6456, 'epoch': 1} {'type': 'loss', 'content': 0.13987679779529572, 'timestamp': '2025-09-30 22:19:50.153050', 'step': 6457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:50.186792', 'step': 6457, 'epoch': 1} {'type': 'loss', 'content': 0.08587423712015152, 'timestamp': '2025-09-30 22:19:50.188995', 'step': 6458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.219583', 'step': 6458, 'epoch': 1} {'type': 'loss', 'content': 0.17170366644859314, 'timestamp': '2025-09-30 22:19:50.224733', 'step': 6459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:50.256988', 'step': 6459, 'epoch': 1} {'type': 'loss', 'content': 0.1333879828453064, 'timestamp': '2025-09-30 22:19:50.293383', 'step': 6460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.324171', 'step': 6460, 'epoch': 1} {'type': 'loss', 'content': 0.15601548552513123, 'timestamp': '2025-09-30 22:19:50.329470', 'step': 6461, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:50.361092', 'step': 6461, 'epoch': 1} {'type': 'loss', 'content': 0.09537176787853241, 'timestamp': '2025-09-30 22:19:50.367128', 'step': 6462, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:50.402257', 'step': 6462, 'epoch': 1} {'type': 'loss', 'content': 0.208126038312912, 'timestamp': '2025-09-30 22:19:50.411776', 'step': 6463, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:50.444642', 'step': 6463, 'epoch': 1} {'type': 'loss', 'content': 0.183893620967865, 'timestamp': '2025-09-30 22:19:50.470725', 'step': 6464, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.507797', 'step': 6464, 'epoch': 1} {'type': 'loss', 'content': 0.18770411610603333, 'timestamp': '2025-09-30 22:19:50.513350', 'step': 6465, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.546218', 'step': 6465, 'epoch': 1} {'type': 'loss', 'content': 0.09314396232366562, 'timestamp': '2025-09-30 22:19:50.551505', 'step': 6466, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:50.581225', 'step': 6466, 'epoch': 1} {'type': 'loss', 'content': 0.10852377116680145, 'timestamp': '2025-09-30 22:19:50.586936', 'step': 6467, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.623737', 'step': 6467, 'epoch': 1} {'type': 'loss', 'content': 0.26547616720199585, 'timestamp': '2025-09-30 22:19:50.649832', 'step': 6468, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.681885', 'step': 6468, 'epoch': 1} {'type': 'loss', 'content': 0.15828905999660492, 'timestamp': '2025-09-30 22:19:50.684492', 'step': 6469, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.722330', 'step': 6469, 'epoch': 1} {'type': 'loss', 'content': 0.08851774036884308, 'timestamp': '2025-09-30 22:19:50.727795', 'step': 6470, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:50.761790', 'step': 6470, 'epoch': 1} {'type': 'loss', 'content': 0.15039414167404175, 'timestamp': '2025-09-30 22:19:50.766251', 'step': 6471, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.798336', 'step': 6471, 'epoch': 1} {'type': 'loss', 'content': 0.07802300155162811, 'timestamp': '2025-09-30 22:19:50.825259', 'step': 6472, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:50.856556', 'step': 6472, 'epoch': 1} {'type': 'loss', 'content': 0.10959552973508835, 'timestamp': '2025-09-30 22:19:50.864654', 'step': 6473, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:50.896913', 'step': 6473, 'epoch': 1} {'type': 'loss', 'content': 0.0996970385313034, 'timestamp': '2025-09-30 22:19:50.899750', 'step': 6474, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:50.930746', 'step': 6474, 'epoch': 1} {'type': 'loss', 'content': 0.20915302634239197, 'timestamp': '2025-09-30 22:19:50.934059', 'step': 6475, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:50.972965', 'step': 6475, 'epoch': 1} {'type': 'loss', 'content': 0.11290036886930466, 'timestamp': '2025-09-30 22:19:50.996886', 'step': 6476, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.031154', 'step': 6476, 'epoch': 1} {'type': 'loss', 'content': 0.1303936243057251, 'timestamp': '2025-09-30 22:19:51.037512', 'step': 6477, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.071485', 'step': 6477, 'epoch': 1} {'type': 'loss', 'content': 0.14639398455619812, 'timestamp': '2025-09-30 22:19:51.076327', 'step': 6478, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.109381', 'step': 6478, 'epoch': 1} {'type': 'loss', 'content': 0.19498714804649353, 'timestamp': '2025-09-30 22:19:51.114335', 'step': 6479, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.146017', 'step': 6479, 'epoch': 1} {'type': 'loss', 'content': 0.13641275465488434, 'timestamp': '2025-09-30 22:19:51.174681', 'step': 6480, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.205739', 'step': 6480, 'epoch': 1} {'type': 'loss', 'content': 0.17806732654571533, 'timestamp': '2025-09-30 22:19:51.208174', 'step': 6481, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.240832', 'step': 6481, 'epoch': 1} {'type': 'loss', 'content': 0.14910508692264557, 'timestamp': '2025-09-30 22:19:51.243793', 'step': 6482, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:51.275632', 'step': 6482, 'epoch': 1} {'type': 'loss', 'content': 0.14592307806015015, 'timestamp': '2025-09-30 22:19:51.279658', 'step': 6483, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.312544', 'step': 6483, 'epoch': 1} {'type': 'loss', 'content': 0.1177348792552948, 'timestamp': '2025-09-30 22:19:51.338645', 'step': 6484, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.368895', 'step': 6484, 'epoch': 1} {'type': 'loss', 'content': 0.19361062347888947, 'timestamp': '2025-09-30 22:19:51.373458', 'step': 6485, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:19:51.406490', 'step': 6485, 'epoch': 1} {'type': 'loss', 'content': 0.12535350024700165, 'timestamp': '2025-09-30 22:19:51.412147', 'step': 6486, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.445491', 'step': 6486, 'epoch': 1} {'type': 'loss', 'content': 0.16370435059070587, 'timestamp': '2025-09-30 22:19:51.447946', 'step': 6487, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:19:51.479324', 'step': 6487, 'epoch': 1} {'type': 'loss', 'content': 0.11926326900720596, 'timestamp': '2025-09-30 22:19:51.503832', 'step': 6488, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:51.535332', 'step': 6488, 'epoch': 1} {'type': 'loss', 'content': 0.15018898248672485, 'timestamp': '2025-09-30 22:19:51.541500', 'step': 6489, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.576602', 'step': 6489, 'epoch': 1} {'type': 'loss', 'content': 0.12540777027606964, 'timestamp': '2025-09-30 22:19:51.583977', 'step': 6490, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.615724', 'step': 6490, 'epoch': 1} {'type': 'loss', 'content': 0.1331394612789154, 'timestamp': '2025-09-30 22:19:51.619668', 'step': 6491, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:51.651542', 'step': 6491, 'epoch': 1} {'type': 'loss', 'content': 0.1299712061882019, 'timestamp': '2025-09-30 22:19:51.676991', 'step': 6492, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.712937', 'step': 6492, 'epoch': 1} {'type': 'loss', 'content': 0.1850869357585907, 'timestamp': '2025-09-30 22:19:51.714856', 'step': 6493, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.745385', 'step': 6493, 'epoch': 1} {'type': 'loss', 'content': 0.1523662954568863, 'timestamp': '2025-09-30 22:19:51.748142', 'step': 6494, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:51.779533', 'step': 6494, 'epoch': 1} {'type': 'loss', 'content': 0.15834428369998932, 'timestamp': '2025-09-30 22:19:51.785044', 'step': 6495, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:51.817164', 'step': 6495, 'epoch': 1} {'type': 'loss', 'content': 0.14858931303024292, 'timestamp': '2025-09-30 22:19:51.841780', 'step': 6496, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.871388', 'step': 6496, 'epoch': 1} {'type': 'loss', 'content': 0.17348720133304596, 'timestamp': '2025-09-30 22:19:51.876931', 'step': 6497, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:51.908748', 'step': 6497, 'epoch': 1} {'type': 'loss', 'content': 0.1712525486946106, 'timestamp': '2025-09-30 22:19:51.913077', 'step': 6498, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:51.945102', 'step': 6498, 'epoch': 1} {'type': 'loss', 'content': 0.20568260550498962, 'timestamp': '2025-09-30 22:19:51.949052', 'step': 6499, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:51.981088', 'step': 6499, 'epoch': 1} {'type': 'loss', 'content': 0.26859989762306213, 'timestamp': '2025-09-30 22:19:52.006293', 'step': 6500, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 6500', 'timestamp': '2025-09-30 22:19:57.383652', 'step': 6500, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:57.415269', 'step': 6500, 'epoch': 1} {'type': 'loss', 'content': 0.212266743183136, 'timestamp': '2025-09-30 22:19:57.419225', 'step': 6501, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:19:57.453039', 'step': 6501, 'epoch': 1} {'type': 'loss', 'content': 0.18660463392734528, 'timestamp': '2025-09-30 22:19:57.458242', 'step': 6502, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.492701', 'step': 6502, 'epoch': 1} {'type': 'loss', 'content': 0.175818532705307, 'timestamp': '2025-09-30 22:19:57.495132', 'step': 6503, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:57.526673', 'step': 6503, 'epoch': 1} {'type': 'loss', 'content': 0.19719190895557404, 'timestamp': '2025-09-30 22:19:57.552117', 'step': 6504, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.584223', 'step': 6504, 'epoch': 1} {'type': 'loss', 'content': 0.13717417418956757, 'timestamp': '2025-09-30 22:19:57.586581', 'step': 6505, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.619021', 'step': 6505, 'epoch': 1} {'type': 'loss', 'content': 0.0871753990650177, 'timestamp': '2025-09-30 22:19:57.622316', 'step': 6506, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:57.653784', 'step': 6506, 'epoch': 1} {'type': 'loss', 'content': 0.14723169803619385, 'timestamp': '2025-09-30 22:19:57.658217', 'step': 6507, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:57.690196', 'step': 6507, 'epoch': 1} {'type': 'loss', 'content': 0.16796095669269562, 'timestamp': '2025-09-30 22:19:57.717247', 'step': 6508, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:57.750105', 'step': 6508, 'epoch': 1} {'type': 'loss', 'content': 0.17274250090122223, 'timestamp': '2025-09-30 22:19:57.753012', 'step': 6509, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.790392', 'step': 6509, 'epoch': 1} {'type': 'loss', 'content': 0.11179421097040176, 'timestamp': '2025-09-30 22:19:57.792368', 'step': 6510, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.823382', 'step': 6510, 'epoch': 1} {'type': 'loss', 'content': 0.1371268332004547, 'timestamp': '2025-09-30 22:19:57.826646', 'step': 6511, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.859407', 'step': 6511, 'epoch': 1} {'type': 'loss', 'content': 0.1985928863286972, 'timestamp': '2025-09-30 22:19:57.883310', 'step': 6512, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:57.914157', 'step': 6512, 'epoch': 1} {'type': 'loss', 'content': 0.10064946115016937, 'timestamp': '2025-09-30 22:19:57.926590', 'step': 6513, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:57.957964', 'step': 6513, 'epoch': 1} {'type': 'loss', 'content': 0.17101214826107025, 'timestamp': '2025-09-30 22:19:57.970151', 'step': 6514, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:58.012296', 'step': 6514, 'epoch': 1} {'type': 'loss', 'content': 0.14549969136714935, 'timestamp': '2025-09-30 22:19:58.018065', 'step': 6515, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:58.049926', 'step': 6515, 'epoch': 1} {'type': 'loss', 'content': 0.19154077768325806, 'timestamp': '2025-09-30 22:19:58.080924', 'step': 6516, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:58.112392', 'step': 6516, 'epoch': 1} {'type': 'loss', 'content': 0.10885805636644363, 'timestamp': '2025-09-30 22:19:58.115944', 'step': 6517, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:58.147168', 'step': 6517, 'epoch': 1} {'type': 'loss', 'content': 0.1522047519683838, 'timestamp': '2025-09-30 22:19:58.172795', 'step': 6518, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:58.211785', 'step': 6518, 'epoch': 1} {'type': 'loss', 'content': 0.18145537376403809, 'timestamp': '2025-09-30 22:19:58.214543', 'step': 6519, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:58.255795', 'step': 6519, 'epoch': 1} {'type': 'loss', 'content': 0.12201748043298721, 'timestamp': '2025-09-30 22:19:58.291885', 'step': 6520, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:58.334888', 'step': 6520, 'epoch': 1} {'type': 'loss', 'content': 0.10684576630592346, 'timestamp': '2025-09-30 22:19:58.362378', 'step': 6521, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:19:58.401667', 'step': 6521, 'epoch': 1} {'type': 'loss', 'content': 0.17395418882369995, 'timestamp': '2025-09-30 22:19:58.406942', 'step': 6522, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:19:58.440607', 'step': 6522, 'epoch': 1} {'type': 'loss', 'content': 0.11506753414869308, 'timestamp': '2025-09-30 22:19:58.452287', 'step': 6523, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:19:58.489690', 'step': 6523, 'epoch': 1} {'type': 'loss', 'content': 0.1657397598028183, 'timestamp': '2025-09-30 22:19:58.518669', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:20:06.166404', 'step': 6524, 'epoch': 1} {'type': 'pplx', 'content': 8921.953566047343, 'timestamp': '2025-09-30 22:20:06.177467', 'step': 6524, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.215758', 'step': 6524, 'epoch': 1} {'type': 'loss', 'content': 0.1057070940732956, 'timestamp': '2025-09-30 22:20:06.225234', 'step': 6525, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.266670', 'step': 6525, 'epoch': 1} {'type': 'loss', 'content': 0.08270192891359329, 'timestamp': '2025-09-30 22:20:06.274768', 'step': 6526, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:06.313413', 'step': 6526, 'epoch': 1} {'type': 'loss', 'content': 0.19150839745998383, 'timestamp': '2025-09-30 22:20:06.320441', 'step': 6527, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.355957', 'step': 6527, 'epoch': 1} {'type': 'loss', 'content': 0.10330509394407272, 'timestamp': '2025-09-30 22:20:06.390910', 'step': 6528, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:06.422654', 'step': 6528, 'epoch': 1} {'type': 'loss', 'content': 0.14554838836193085, 'timestamp': '2025-09-30 22:20:06.428580', 'step': 6529, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.461029', 'step': 6529, 'epoch': 1} {'type': 'loss', 'content': 0.25670015811920166, 'timestamp': '2025-09-30 22:20:06.471122', 'step': 6530, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:06.506625', 'step': 6530, 'epoch': 1} {'type': 'loss', 'content': 0.1629338264465332, 'timestamp': '2025-09-30 22:20:06.509264', 'step': 6531, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:06.541163', 'step': 6531, 'epoch': 1} {'type': 'loss', 'content': 0.11799664050340652, 'timestamp': '2025-09-30 22:20:06.570940', 'step': 6532, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:06.603098', 'step': 6532, 'epoch': 1} {'type': 'loss', 'content': 0.10280611366033554, 'timestamp': '2025-09-30 22:20:06.605676', 'step': 6533, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.642005', 'step': 6533, 'epoch': 1} {'type': 'loss', 'content': 0.14794816076755524, 'timestamp': '2025-09-30 22:20:06.649117', 'step': 6534, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:06.685629', 'step': 6534, 'epoch': 1} {'type': 'loss', 'content': 0.09008286893367767, 'timestamp': '2025-09-30 22:20:06.688448', 'step': 6535, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.722636', 'step': 6535, 'epoch': 1} {'type': 'loss', 'content': 0.12726275622844696, 'timestamp': '2025-09-30 22:20:06.751023', 'step': 6536, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:06.784437', 'step': 6536, 'epoch': 1} {'type': 'loss', 'content': 0.059680476784706116, 'timestamp': '2025-09-30 22:20:06.790011', 'step': 6537, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.825334', 'step': 6537, 'epoch': 1} {'type': 'loss', 'content': 0.17486906051635742, 'timestamp': '2025-09-30 22:20:06.827995', 'step': 6538, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.860171', 'step': 6538, 'epoch': 1} {'type': 'loss', 'content': 0.1711418330669403, 'timestamp': '2025-09-30 22:20:06.869994', 'step': 6539, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.906854', 'step': 6539, 'epoch': 1} {'type': 'loss', 'content': 0.160882368683815, 'timestamp': '2025-09-30 22:20:06.937702', 'step': 6540, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:06.970997', 'step': 6540, 'epoch': 1} {'type': 'loss', 'content': 0.1462123543024063, 'timestamp': '2025-09-30 22:20:06.974797', 'step': 6541, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.006625', 'step': 6541, 'epoch': 1} {'type': 'loss', 'content': 0.12735697627067566, 'timestamp': '2025-09-30 22:20:07.015157', 'step': 6542, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:07.053169', 'step': 6542, 'epoch': 1} {'type': 'loss', 'content': 0.18855063617229462, 'timestamp': '2025-09-30 22:20:07.061504', 'step': 6543, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:07.101345', 'step': 6543, 'epoch': 1} {'type': 'loss', 'content': 0.16060277819633484, 'timestamp': '2025-09-30 22:20:07.127177', 'step': 6544, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:07.159167', 'step': 6544, 'epoch': 1} {'type': 'loss', 'content': 0.08730395883321762, 'timestamp': '2025-09-30 22:20:07.162324', 'step': 6545, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:07.195081', 'step': 6545, 'epoch': 1} {'type': 'loss', 'content': 0.12740878760814667, 'timestamp': '2025-09-30 22:20:07.198451', 'step': 6546, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:07.238714', 'step': 6546, 'epoch': 1} {'type': 'loss', 'content': 0.059070464223623276, 'timestamp': '2025-09-30 22:20:07.242910', 'step': 6547, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:07.274807', 'step': 6547, 'epoch': 1} {'type': 'loss', 'content': 0.11493650078773499, 'timestamp': '2025-09-30 22:20:07.311429', 'step': 6548, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:07.344000', 'step': 6548, 'epoch': 1} {'type': 'loss', 'content': 0.19784173369407654, 'timestamp': '2025-09-30 22:20:07.349360', 'step': 6549, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:07.394418', 'step': 6549, 'epoch': 1} {'type': 'loss', 'content': 0.1976039707660675, 'timestamp': '2025-09-30 22:20:07.406975', 'step': 6550, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.444664', 'step': 6550, 'epoch': 1} {'type': 'loss', 'content': 0.19669149816036224, 'timestamp': '2025-09-30 22:20:07.462546', 'step': 6551, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.494024', 'step': 6551, 'epoch': 1} {'type': 'loss', 'content': 0.1528627574443817, 'timestamp': '2025-09-30 22:20:07.518797', 'step': 6552, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.558338', 'step': 6552, 'epoch': 1} {'type': 'loss', 'content': 0.18791216611862183, 'timestamp': '2025-09-30 22:20:07.561329', 'step': 6553, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.603208', 'step': 6553, 'epoch': 1} {'type': 'loss', 'content': 0.04434913769364357, 'timestamp': '2025-09-30 22:20:07.614636', 'step': 6554, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:07.662748', 'step': 6554, 'epoch': 1} {'type': 'loss', 'content': 0.1594502180814743, 'timestamp': '2025-09-30 22:20:07.668405', 'step': 6555, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:07.710444', 'step': 6555, 'epoch': 1} {'type': 'loss', 'content': 0.1979198157787323, 'timestamp': '2025-09-30 22:20:07.736008', 'step': 6556, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:07.775346', 'step': 6556, 'epoch': 1} {'type': 'loss', 'content': 0.1268458068370819, 'timestamp': '2025-09-30 22:20:07.780593', 'step': 6557, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:07.815865', 'step': 6557, 'epoch': 1} {'type': 'loss', 'content': 0.13463298976421356, 'timestamp': '2025-09-30 22:20:07.825257', 'step': 6558, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:07.861845', 'step': 6558, 'epoch': 1} {'type': 'loss', 'content': 0.22432518005371094, 'timestamp': '2025-09-30 22:20:07.865824', 'step': 6559, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:07.901576', 'step': 6559, 'epoch': 1} {'type': 'loss', 'content': 0.1633538454771042, 'timestamp': '2025-09-30 22:20:07.927720', 'step': 6560, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:07.963858', 'step': 6560, 'epoch': 1} {'type': 'loss', 'content': 0.1378590315580368, 'timestamp': '2025-09-30 22:20:07.974006', 'step': 6561, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.007417', 'step': 6561, 'epoch': 1} {'type': 'loss', 'content': 0.18469364941120148, 'timestamp': '2025-09-30 22:20:08.016428', 'step': 6562, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.058106', 'step': 6562, 'epoch': 1} {'type': 'loss', 'content': 0.2055315524339676, 'timestamp': '2025-09-30 22:20:08.070555', 'step': 6563, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:08.114622', 'step': 6563, 'epoch': 1} {'type': 'loss', 'content': 0.2437816709280014, 'timestamp': '2025-09-30 22:20:08.140067', 'step': 6564, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:08.171877', 'step': 6564, 'epoch': 1} {'type': 'loss', 'content': 0.11494027823209763, 'timestamp': '2025-09-30 22:20:08.176659', 'step': 6565, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.208380', 'step': 6565, 'epoch': 1} {'type': 'loss', 'content': 0.14146506786346436, 'timestamp': '2025-09-30 22:20:08.216622', 'step': 6566, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.255090', 'step': 6566, 'epoch': 1} {'type': 'loss', 'content': 0.1405462771654129, 'timestamp': '2025-09-30 22:20:08.259847', 'step': 6567, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.300523', 'step': 6567, 'epoch': 1} {'type': 'loss', 'content': 0.14074990153312683, 'timestamp': '2025-09-30 22:20:08.337733', 'step': 6568, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.369944', 'step': 6568, 'epoch': 1} {'type': 'loss', 'content': 0.1368180513381958, 'timestamp': '2025-09-30 22:20:08.373393', 'step': 6569, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:08.406909', 'step': 6569, 'epoch': 1} {'type': 'loss', 'content': 0.2639935314655304, 'timestamp': '2025-09-30 22:20:08.411770', 'step': 6570, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:08.444229', 'step': 6570, 'epoch': 1} {'type': 'loss', 'content': 0.14975742995738983, 'timestamp': '2025-09-30 22:20:08.451042', 'step': 6571, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:08.486128', 'step': 6571, 'epoch': 1} {'type': 'loss', 'content': 0.14441509544849396, 'timestamp': '2025-09-30 22:20:08.517705', 'step': 6572, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.560158', 'step': 6572, 'epoch': 1} {'type': 'loss', 'content': 0.10417866706848145, 'timestamp': '2025-09-30 22:20:08.562790', 'step': 6573, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:08.593034', 'step': 6573, 'epoch': 1} {'type': 'loss', 'content': 0.12648765742778778, 'timestamp': '2025-09-30 22:20:08.602288', 'step': 6574, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.638477', 'step': 6574, 'epoch': 1} {'type': 'loss', 'content': 0.19058731198310852, 'timestamp': '2025-09-30 22:20:08.647237', 'step': 6575, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:08.678645', 'step': 6575, 'epoch': 1} {'type': 'loss', 'content': 0.10629051178693771, 'timestamp': '2025-09-30 22:20:08.703167', 'step': 6576, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:08.733290', 'step': 6576, 'epoch': 1} {'type': 'loss', 'content': 0.11200199276208878, 'timestamp': '2025-09-30 22:20:08.737147', 'step': 6577, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.770223', 'step': 6577, 'epoch': 1} {'type': 'loss', 'content': 0.14680209755897522, 'timestamp': '2025-09-30 22:20:08.772853', 'step': 6578, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:08.804319', 'step': 6578, 'epoch': 1} {'type': 'loss', 'content': 0.1266738623380661, 'timestamp': '2025-09-30 22:20:08.807840', 'step': 6579, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:08.838888', 'step': 6579, 'epoch': 1} {'type': 'loss', 'content': 0.1953824907541275, 'timestamp': '2025-09-30 22:20:08.869055', 'step': 6580, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:08.900516', 'step': 6580, 'epoch': 1} {'type': 'loss', 'content': 0.16114501655101776, 'timestamp': '2025-09-30 22:20:08.903883', 'step': 6581, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:08.938852', 'step': 6581, 'epoch': 1} {'type': 'loss', 'content': 0.12529954314231873, 'timestamp': '2025-09-30 22:20:08.941988', 'step': 6582, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:08.978977', 'step': 6582, 'epoch': 1} {'type': 'loss', 'content': 0.11463402211666107, 'timestamp': '2025-09-30 22:20:08.982180', 'step': 6583, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:09.014920', 'step': 6583, 'epoch': 1} {'type': 'loss', 'content': 0.17838594317436218, 'timestamp': '2025-09-30 22:20:09.041271', 'step': 6584, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:09.072842', 'step': 6584, 'epoch': 1} {'type': 'loss', 'content': 0.23366966843605042, 'timestamp': '2025-09-30 22:20:09.077375', 'step': 6585, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.116871', 'step': 6585, 'epoch': 1} {'type': 'loss', 'content': 0.08991654962301254, 'timestamp': '2025-09-30 22:20:09.120544', 'step': 6586, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:09.151274', 'step': 6586, 'epoch': 1} {'type': 'loss', 'content': 0.13251833617687225, 'timestamp': '2025-09-30 22:20:09.160087', 'step': 6587, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.201661', 'step': 6587, 'epoch': 1} {'type': 'loss', 'content': 0.16990281641483307, 'timestamp': '2025-09-30 22:20:09.225515', 'step': 6588, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:09.257288', 'step': 6588, 'epoch': 1} {'type': 'loss', 'content': 0.15341269969940186, 'timestamp': '2025-09-30 22:20:09.266952', 'step': 6589, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:09.299617', 'step': 6589, 'epoch': 1} {'type': 'loss', 'content': 0.12653951346874237, 'timestamp': '2025-09-30 22:20:09.304052', 'step': 6590, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:09.340164', 'step': 6590, 'epoch': 1} {'type': 'loss', 'content': 0.09093127399682999, 'timestamp': '2025-09-30 22:20:09.349764', 'step': 6591, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:09.389271', 'step': 6591, 'epoch': 1} {'type': 'loss', 'content': 0.23272407054901123, 'timestamp': '2025-09-30 22:20:09.415812', 'step': 6592, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.446600', 'step': 6592, 'epoch': 1} {'type': 'loss', 'content': 0.10539402812719345, 'timestamp': '2025-09-30 22:20:09.450700', 'step': 6593, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.488920', 'step': 6593, 'epoch': 1} {'type': 'loss', 'content': 0.15645618736743927, 'timestamp': '2025-09-30 22:20:09.498850', 'step': 6594, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:09.530367', 'step': 6594, 'epoch': 1} {'type': 'loss', 'content': 0.11448593437671661, 'timestamp': '2025-09-30 22:20:09.549253', 'step': 6595, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:09.589314', 'step': 6595, 'epoch': 1} {'type': 'loss', 'content': 0.12441445142030716, 'timestamp': '2025-09-30 22:20:09.614155', 'step': 6596, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:09.645794', 'step': 6596, 'epoch': 1} {'type': 'loss', 'content': 0.19315767288208008, 'timestamp': '2025-09-30 22:20:09.656370', 'step': 6597, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:09.690795', 'step': 6597, 'epoch': 1} {'type': 'loss', 'content': 0.07377762347459793, 'timestamp': '2025-09-30 22:20:09.696813', 'step': 6598, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:09.733664', 'step': 6598, 'epoch': 1} {'type': 'loss', 'content': 0.08505811542272568, 'timestamp': '2025-09-30 22:20:09.737744', 'step': 6599, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:09.773383', 'step': 6599, 'epoch': 1} {'type': 'loss', 'content': 0.09398719668388367, 'timestamp': '2025-09-30 22:20:09.800227', 'step': 6600, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:09.837295', 'step': 6600, 'epoch': 1} {'type': 'loss', 'content': 0.10473071783781052, 'timestamp': '2025-09-30 22:20:09.840556', 'step': 6601, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.873134', 'step': 6601, 'epoch': 1} {'type': 'loss', 'content': 0.1029803454875946, 'timestamp': '2025-09-30 22:20:09.880738', 'step': 6602, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:09.920044', 'step': 6602, 'epoch': 1} {'type': 'loss', 'content': 0.17785777151584625, 'timestamp': '2025-09-30 22:20:09.930923', 'step': 6603, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:09.970170', 'step': 6603, 'epoch': 1} {'type': 'loss', 'content': 0.27522900700569153, 'timestamp': '2025-09-30 22:20:09.996789', 'step': 6604, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:10.031706', 'step': 6604, 'epoch': 1} {'type': 'loss', 'content': 0.08181918412446976, 'timestamp': '2025-09-30 22:20:10.038034', 'step': 6605, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.069624', 'step': 6605, 'epoch': 1} {'type': 'loss', 'content': 0.12103845179080963, 'timestamp': '2025-09-30 22:20:10.075638', 'step': 6606, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:10.122693', 'step': 6606, 'epoch': 1} {'type': 'loss', 'content': 0.10419808328151703, 'timestamp': '2025-09-30 22:20:10.137064', 'step': 6607, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.181680', 'step': 6607, 'epoch': 1} {'type': 'loss', 'content': 0.1344982236623764, 'timestamp': '2025-09-30 22:20:10.218258', 'step': 6608, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:10.250263', 'step': 6608, 'epoch': 1} {'type': 'loss', 'content': 0.17211231589317322, 'timestamp': '2025-09-30 22:20:10.260762', 'step': 6609, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:10.307551', 'step': 6609, 'epoch': 1} {'type': 'loss', 'content': 0.23559080064296722, 'timestamp': '2025-09-30 22:20:10.311168', 'step': 6610, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:10.347105', 'step': 6610, 'epoch': 1} {'type': 'loss', 'content': 0.1721581518650055, 'timestamp': '2025-09-30 22:20:10.359415', 'step': 6611, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.391056', 'step': 6611, 'epoch': 1} {'type': 'loss', 'content': 0.1331813633441925, 'timestamp': '2025-09-30 22:20:10.420871', 'step': 6612, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.460058', 'step': 6612, 'epoch': 1} {'type': 'loss', 'content': 0.163241907954216, 'timestamp': '2025-09-30 22:20:10.470304', 'step': 6613, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.509219', 'step': 6613, 'epoch': 1} {'type': 'loss', 'content': 0.1632031500339508, 'timestamp': '2025-09-30 22:20:10.519048', 'step': 6614, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.558077', 'step': 6614, 'epoch': 1} {'type': 'loss', 'content': 0.08764788508415222, 'timestamp': '2025-09-30 22:20:10.561832', 'step': 6615, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.600522', 'step': 6615, 'epoch': 1} {'type': 'loss', 'content': 0.18374750018119812, 'timestamp': '2025-09-30 22:20:10.632605', 'step': 6616, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:10.671358', 'step': 6616, 'epoch': 1} {'type': 'loss', 'content': 0.11059752851724625, 'timestamp': '2025-09-30 22:20:10.678915', 'step': 6617, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:10.718172', 'step': 6617, 'epoch': 1} {'type': 'loss', 'content': 0.08896244317293167, 'timestamp': '2025-09-30 22:20:10.721101', 'step': 6618, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.762690', 'step': 6618, 'epoch': 1} {'type': 'loss', 'content': 0.1006835475564003, 'timestamp': '2025-09-30 22:20:10.771371', 'step': 6619, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.808960', 'step': 6619, 'epoch': 1} {'type': 'loss', 'content': 0.11388977617025375, 'timestamp': '2025-09-30 22:20:10.833254', 'step': 6620, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:10.864062', 'step': 6620, 'epoch': 1} {'type': 'loss', 'content': 0.14488978683948517, 'timestamp': '2025-09-30 22:20:10.874407', 'step': 6621, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:10.906888', 'step': 6621, 'epoch': 1} {'type': 'loss', 'content': 0.12533807754516602, 'timestamp': '2025-09-30 22:20:10.911725', 'step': 6622, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.948724', 'step': 6622, 'epoch': 1} {'type': 'loss', 'content': 0.16222721338272095, 'timestamp': '2025-09-30 22:20:10.960273', 'step': 6623, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:10.996716', 'step': 6623, 'epoch': 1} {'type': 'loss', 'content': 0.11641961336135864, 'timestamp': '2025-09-30 22:20:11.021219', 'step': 6624, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:11.056045', 'step': 6624, 'epoch': 1} {'type': 'loss', 'content': 0.16969943046569824, 'timestamp': '2025-09-30 22:20:11.064591', 'step': 6625, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.101964', 'step': 6625, 'epoch': 1} {'type': 'loss', 'content': 0.15016254782676697, 'timestamp': '2025-09-30 22:20:11.112806', 'step': 6626, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.145889', 'step': 6626, 'epoch': 1} {'type': 'loss', 'content': 0.08349163830280304, 'timestamp': '2025-09-30 22:20:11.153137', 'step': 6627, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.194812', 'step': 6627, 'epoch': 1} {'type': 'loss', 'content': 0.17196449637413025, 'timestamp': '2025-09-30 22:20:11.232106', 'step': 6628, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.275711', 'step': 6628, 'epoch': 1} {'type': 'loss', 'content': 0.20461440086364746, 'timestamp': '2025-09-30 22:20:11.280736', 'step': 6629, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.314085', 'step': 6629, 'epoch': 1} {'type': 'loss', 'content': 0.11687320470809937, 'timestamp': '2025-09-30 22:20:11.321850', 'step': 6630, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.357735', 'step': 6630, 'epoch': 1} {'type': 'loss', 'content': 0.21547208726406097, 'timestamp': '2025-09-30 22:20:11.361787', 'step': 6631, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.393851', 'step': 6631, 'epoch': 1} {'type': 'loss', 'content': 0.11087117344141006, 'timestamp': '2025-09-30 22:20:11.419622', 'step': 6632, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.449736', 'step': 6632, 'epoch': 1} {'type': 'loss', 'content': 0.1421733796596527, 'timestamp': '2025-09-30 22:20:11.455881', 'step': 6633, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:11.493325', 'step': 6633, 'epoch': 1} {'type': 'loss', 'content': 0.1419723927974701, 'timestamp': '2025-09-30 22:20:11.498908', 'step': 6634, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.530348', 'step': 6634, 'epoch': 1} {'type': 'loss', 'content': 0.2147253006696701, 'timestamp': '2025-09-30 22:20:11.533904', 'step': 6635, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.573917', 'step': 6635, 'epoch': 1} {'type': 'loss', 'content': 0.1485115885734558, 'timestamp': '2025-09-30 22:20:11.605613', 'step': 6636, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.643057', 'step': 6636, 'epoch': 1} {'type': 'loss', 'content': 0.045559920370578766, 'timestamp': '2025-09-30 22:20:11.653236', 'step': 6637, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:11.691485', 'step': 6637, 'epoch': 1} {'type': 'loss', 'content': 0.11175119876861572, 'timestamp': '2025-09-30 22:20:11.694104', 'step': 6638, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:11.732960', 'step': 6638, 'epoch': 1} {'type': 'loss', 'content': 0.16119170188903809, 'timestamp': '2025-09-30 22:20:11.739805', 'step': 6639, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:11.772060', 'step': 6639, 'epoch': 1} {'type': 'loss', 'content': 0.14885559678077698, 'timestamp': '2025-09-30 22:20:11.797291', 'step': 6640, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:11.830499', 'step': 6640, 'epoch': 1} {'type': 'loss', 'content': 0.2622135579586029, 'timestamp': '2025-09-30 22:20:11.839648', 'step': 6641, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.872325', 'step': 6641, 'epoch': 1} {'type': 'loss', 'content': 0.1197562888264656, 'timestamp': '2025-09-30 22:20:11.878780', 'step': 6642, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:11.918781', 'step': 6642, 'epoch': 1} {'type': 'loss', 'content': 0.22722704708576202, 'timestamp': '2025-09-30 22:20:11.929555', 'step': 6643, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:11.961070', 'step': 6643, 'epoch': 1} {'type': 'loss', 'content': 0.1554647535085678, 'timestamp': '2025-09-30 22:20:11.984856', 'step': 6644, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.020926', 'step': 6644, 'epoch': 1} {'type': 'loss', 'content': 0.0970102846622467, 'timestamp': '2025-09-30 22:20:12.024075', 'step': 6645, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.060184', 'step': 6645, 'epoch': 1} {'type': 'loss', 'content': 0.10711076110601425, 'timestamp': '2025-09-30 22:20:12.062500', 'step': 6646, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:12.097260', 'step': 6646, 'epoch': 1} {'type': 'loss', 'content': 0.11616508662700653, 'timestamp': '2025-09-30 22:20:12.104726', 'step': 6647, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.141062', 'step': 6647, 'epoch': 1} {'type': 'loss', 'content': 0.09144264459609985, 'timestamp': '2025-09-30 22:20:12.170849', 'step': 6648, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:12.209048', 'step': 6648, 'epoch': 1} {'type': 'loss', 'content': 0.24283276498317719, 'timestamp': '2025-09-30 22:20:12.212366', 'step': 6649, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:12.243701', 'step': 6649, 'epoch': 1} {'type': 'loss', 'content': 0.2067667841911316, 'timestamp': '2025-09-30 22:20:12.246923', 'step': 6650, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.282629', 'step': 6650, 'epoch': 1} {'type': 'loss', 'content': 0.08597872406244278, 'timestamp': '2025-09-30 22:20:12.292366', 'step': 6651, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:12.331039', 'step': 6651, 'epoch': 1} {'type': 'loss', 'content': 0.21713018417358398, 'timestamp': '2025-09-30 22:20:12.356027', 'step': 6652, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:12.386599', 'step': 6652, 'epoch': 1} {'type': 'loss', 'content': 0.2227483093738556, 'timestamp': '2025-09-30 22:20:12.392197', 'step': 6653, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:12.430144', 'step': 6653, 'epoch': 1} {'type': 'loss', 'content': 0.11344192177057266, 'timestamp': '2025-09-30 22:20:12.434114', 'step': 6654, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.466739', 'step': 6654, 'epoch': 1} {'type': 'loss', 'content': 0.17290794849395752, 'timestamp': '2025-09-30 22:20:12.472988', 'step': 6655, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:12.505062', 'step': 6655, 'epoch': 1} {'type': 'loss', 'content': 0.1762993037700653, 'timestamp': '2025-09-30 22:20:12.530231', 'step': 6656, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:12.567380', 'step': 6656, 'epoch': 1} {'type': 'loss', 'content': 0.13327033817768097, 'timestamp': '2025-09-30 22:20:12.571207', 'step': 6657, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:12.606587', 'step': 6657, 'epoch': 1} {'type': 'loss', 'content': 0.20712602138519287, 'timestamp': '2025-09-30 22:20:12.609540', 'step': 6658, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.645813', 'step': 6658, 'epoch': 1} {'type': 'loss', 'content': 0.09827881306409836, 'timestamp': '2025-09-30 22:20:12.652225', 'step': 6659, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:12.689278', 'step': 6659, 'epoch': 1} {'type': 'loss', 'content': 0.1886090338230133, 'timestamp': '2025-09-30 22:20:12.712840', 'step': 6660, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.749894', 'step': 6660, 'epoch': 1} {'type': 'loss', 'content': 0.05351091921329498, 'timestamp': '2025-09-30 22:20:12.755340', 'step': 6661, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:12.792125', 'step': 6661, 'epoch': 1} {'type': 'loss', 'content': 0.11473312973976135, 'timestamp': '2025-09-30 22:20:12.797522', 'step': 6662, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.831998', 'step': 6662, 'epoch': 1} {'type': 'loss', 'content': 0.10910852253437042, 'timestamp': '2025-09-30 22:20:12.835044', 'step': 6663, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:12.867111', 'step': 6663, 'epoch': 1} {'type': 'loss', 'content': 0.145111545920372, 'timestamp': '2025-09-30 22:20:12.893343', 'step': 6664, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:12.930326', 'step': 6664, 'epoch': 1} {'type': 'loss', 'content': 0.17001944780349731, 'timestamp': '2025-09-30 22:20:12.933470', 'step': 6665, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:12.964670', 'step': 6665, 'epoch': 1} {'type': 'loss', 'content': 0.08929920941591263, 'timestamp': '2025-09-30 22:20:12.970361', 'step': 6666, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.001817', 'step': 6666, 'epoch': 1} {'type': 'loss', 'content': 0.33054810762405396, 'timestamp': '2025-09-30 22:20:13.005666', 'step': 6667, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.035995', 'step': 6667, 'epoch': 1} {'type': 'loss', 'content': 0.17088235914707184, 'timestamp': '2025-09-30 22:20:13.065402', 'step': 6668, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.101189', 'step': 6668, 'epoch': 1} {'type': 'loss', 'content': 0.12750886380672455, 'timestamp': '2025-09-30 22:20:13.104656', 'step': 6669, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.136013', 'step': 6669, 'epoch': 1} {'type': 'loss', 'content': 0.09258880466222763, 'timestamp': '2025-09-30 22:20:13.140922', 'step': 6670, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:13.171502', 'step': 6670, 'epoch': 1} {'type': 'loss', 'content': 0.04268855229020119, 'timestamp': '2025-09-30 22:20:13.179932', 'step': 6671, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.216124', 'step': 6671, 'epoch': 1} {'type': 'loss', 'content': 0.10938090085983276, 'timestamp': '2025-09-30 22:20:13.244441', 'step': 6672, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.283986', 'step': 6672, 'epoch': 1} {'type': 'loss', 'content': 0.20936226844787598, 'timestamp': '2025-09-30 22:20:13.286885', 'step': 6673, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.329035', 'step': 6673, 'epoch': 1} {'type': 'loss', 'content': 0.20442698895931244, 'timestamp': '2025-09-30 22:20:13.338354', 'step': 6674, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.371615', 'step': 6674, 'epoch': 1} {'type': 'loss', 'content': 0.19800560176372528, 'timestamp': '2025-09-30 22:20:13.374796', 'step': 6675, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.409337', 'step': 6675, 'epoch': 1} {'type': 'loss', 'content': 0.08504921197891235, 'timestamp': '2025-09-30 22:20:13.440265', 'step': 6676, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:13.470986', 'step': 6676, 'epoch': 1} {'type': 'loss', 'content': 0.10987968742847443, 'timestamp': '2025-09-30 22:20:13.477680', 'step': 6677, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.519621', 'step': 6677, 'epoch': 1} {'type': 'loss', 'content': 0.15437644720077515, 'timestamp': '2025-09-30 22:20:13.528156', 'step': 6678, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.563270', 'step': 6678, 'epoch': 1} {'type': 'loss', 'content': 0.22409579157829285, 'timestamp': '2025-09-30 22:20:13.572568', 'step': 6679, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:13.609399', 'step': 6679, 'epoch': 1} {'type': 'loss', 'content': 0.18367841839790344, 'timestamp': '2025-09-30 22:20:13.636025', 'step': 6680, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.672398', 'step': 6680, 'epoch': 1} {'type': 'loss', 'content': 0.07802431285381317, 'timestamp': '2025-09-30 22:20:13.675766', 'step': 6681, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.719317', 'step': 6681, 'epoch': 1} {'type': 'loss', 'content': 0.1623166799545288, 'timestamp': '2025-09-30 22:20:13.721932', 'step': 6682, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:13.769488', 'step': 6682, 'epoch': 1} {'type': 'loss', 'content': 0.22516538202762604, 'timestamp': '2025-09-30 22:20:13.772991', 'step': 6683, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:13.803584', 'step': 6683, 'epoch': 1} {'type': 'loss', 'content': 0.1773226261138916, 'timestamp': '2025-09-30 22:20:13.831950', 'step': 6684, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:13.862015', 'step': 6684, 'epoch': 1} {'type': 'loss', 'content': 0.18559621274471283, 'timestamp': '2025-09-30 22:20:13.868826', 'step': 6685, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.903426', 'step': 6685, 'epoch': 1} {'type': 'loss', 'content': 0.10032419115304947, 'timestamp': '2025-09-30 22:20:13.907070', 'step': 6686, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.938318', 'step': 6686, 'epoch': 1} {'type': 'loss', 'content': 0.13497185707092285, 'timestamp': '2025-09-30 22:20:13.942484', 'step': 6687, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:13.978673', 'step': 6687, 'epoch': 1} {'type': 'loss', 'content': 0.17207111418247223, 'timestamp': '2025-09-30 22:20:14.003303', 'step': 6688, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:14.033645', 'step': 6688, 'epoch': 1} {'type': 'loss', 'content': 0.23750898241996765, 'timestamp': '2025-09-30 22:20:14.040925', 'step': 6689, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:14.076527', 'step': 6689, 'epoch': 1} {'type': 'loss', 'content': 0.20655789971351624, 'timestamp': '2025-09-30 22:20:14.085178', 'step': 6690, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.122245', 'step': 6690, 'epoch': 1} {'type': 'loss', 'content': 0.16247747838497162, 'timestamp': '2025-09-30 22:20:14.125887', 'step': 6691, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:14.164873', 'step': 6691, 'epoch': 1} {'type': 'loss', 'content': 0.1277620941400528, 'timestamp': '2025-09-30 22:20:14.189727', 'step': 6692, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:14.224331', 'step': 6692, 'epoch': 1} {'type': 'loss', 'content': 0.19033613801002502, 'timestamp': '2025-09-30 22:20:14.226996', 'step': 6693, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:14.257545', 'step': 6693, 'epoch': 1} {'type': 'loss', 'content': 0.11476631462574005, 'timestamp': '2025-09-30 22:20:14.266656', 'step': 6694, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.309890', 'step': 6694, 'epoch': 1} {'type': 'loss', 'content': 0.13134904205799103, 'timestamp': '2025-09-30 22:20:14.322727', 'step': 6695, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:14.354164', 'step': 6695, 'epoch': 1} {'type': 'loss', 'content': 0.12038645148277283, 'timestamp': '2025-09-30 22:20:14.384879', 'step': 6696, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.420758', 'step': 6696, 'epoch': 1} {'type': 'loss', 'content': 0.16646704077720642, 'timestamp': '2025-09-30 22:20:14.424196', 'step': 6697, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:14.461582', 'step': 6697, 'epoch': 1} {'type': 'loss', 'content': 0.1845477670431137, 'timestamp': '2025-09-30 22:20:14.464957', 'step': 6698, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:14.495808', 'step': 6698, 'epoch': 1} {'type': 'loss', 'content': 0.18115156888961792, 'timestamp': '2025-09-30 22:20:14.498530', 'step': 6699, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:14.529395', 'step': 6699, 'epoch': 1} {'type': 'loss', 'content': 0.11388842016458511, 'timestamp': '2025-09-30 22:20:14.563296', 'step': 6700, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.594371', 'step': 6700, 'epoch': 1} {'type': 'loss', 'content': 0.19443528354167938, 'timestamp': '2025-09-30 22:20:14.599532', 'step': 6701, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:14.638781', 'step': 6701, 'epoch': 1} {'type': 'loss', 'content': 0.23114266991615295, 'timestamp': '2025-09-30 22:20:14.645990', 'step': 6702, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:14.677799', 'step': 6702, 'epoch': 1} {'type': 'loss', 'content': 0.1388145089149475, 'timestamp': '2025-09-30 22:20:14.681403', 'step': 6703, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:14.713749', 'step': 6703, 'epoch': 1} {'type': 'loss', 'content': 0.13992951810359955, 'timestamp': '2025-09-30 22:20:14.748629', 'step': 6704, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.790156', 'step': 6704, 'epoch': 1} {'type': 'loss', 'content': 0.17920878529548645, 'timestamp': '2025-09-30 22:20:14.802850', 'step': 6705, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:14.840407', 'step': 6705, 'epoch': 1} {'type': 'loss', 'content': 0.17345675826072693, 'timestamp': '2025-09-30 22:20:14.848220', 'step': 6706, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:14.887005', 'step': 6706, 'epoch': 1} {'type': 'loss', 'content': 0.1490478664636612, 'timestamp': '2025-09-30 22:20:14.892504', 'step': 6707, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:14.943241', 'step': 6707, 'epoch': 1} {'type': 'loss', 'content': 0.13596908748149872, 'timestamp': '2025-09-30 22:20:14.977178', 'step': 6708, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:15.017689', 'step': 6708, 'epoch': 1} {'type': 'loss', 'content': 0.10068807750940323, 'timestamp': '2025-09-30 22:20:15.029329', 'step': 6709, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:15.072891', 'step': 6709, 'epoch': 1} {'type': 'loss', 'content': 0.23788835108280182, 'timestamp': '2025-09-30 22:20:15.080383', 'step': 6710, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:15.111960', 'step': 6710, 'epoch': 1} {'type': 'loss', 'content': 0.23507791757583618, 'timestamp': '2025-09-30 22:20:15.120408', 'step': 6711, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:15.157010', 'step': 6711, 'epoch': 1} {'type': 'loss', 'content': 0.18307799100875854, 'timestamp': '2025-09-30 22:20:15.188281', 'step': 6712, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:15.234139', 'step': 6712, 'epoch': 1} {'type': 'loss', 'content': 0.11929122358560562, 'timestamp': '2025-09-30 22:20:15.247256', 'step': 6713, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:15.286910', 'step': 6713, 'epoch': 1} {'type': 'loss', 'content': 0.1657455414533615, 'timestamp': '2025-09-30 22:20:15.290408', 'step': 6714, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:15.321104', 'step': 6714, 'epoch': 1} {'type': 'loss', 'content': 0.09441978484392166, 'timestamp': '2025-09-30 22:20:15.327513', 'step': 6715, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:15.362873', 'step': 6715, 'epoch': 1} {'type': 'loss', 'content': 0.15812471508979797, 'timestamp': '2025-09-30 22:20:15.393623', 'step': 6716, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:20:15.425178', 'step': 6716, 'epoch': 1} {'type': 'loss', 'content': 0.18261022865772247, 'timestamp': '2025-09-30 22:20:15.432167', 'step': 6717, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:15.467751', 'step': 6717, 'epoch': 1} {'type': 'loss', 'content': 0.13789215683937073, 'timestamp': '2025-09-30 22:20:15.475817', 'step': 6718, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:15.540715', 'step': 6718, 'epoch': 1} {'type': 'loss', 'content': 0.15330293774604797, 'timestamp': '2025-09-30 22:20:15.548006', 'step': 6719, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:15.582615', 'step': 6719, 'epoch': 1} {'type': 'loss', 'content': 0.14851795136928558, 'timestamp': '2025-09-30 22:20:15.606643', 'step': 6720, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:15.647407', 'step': 6720, 'epoch': 1} {'type': 'loss', 'content': 0.20012499392032623, 'timestamp': '2025-09-30 22:20:15.649412', 'step': 6721, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:15.698163', 'step': 6721, 'epoch': 1} {'type': 'loss', 'content': 0.28804296255111694, 'timestamp': '2025-09-30 22:20:15.705739', 'step': 6722, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:15.740842', 'step': 6722, 'epoch': 1} {'type': 'loss', 'content': 0.19133831560611725, 'timestamp': '2025-09-30 22:20:15.743422', 'step': 6723, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:15.781604', 'step': 6723, 'epoch': 1} {'type': 'loss', 'content': 0.10692957788705826, 'timestamp': '2025-09-30 22:20:15.811350', 'step': 6724, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:15.855641', 'step': 6724, 'epoch': 1} {'type': 'loss', 'content': 0.13123854994773865, 'timestamp': '2025-09-30 22:20:15.859314', 'step': 6725, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:15.903785', 'step': 6725, 'epoch': 1} {'type': 'loss', 'content': 0.113420769572258, 'timestamp': '2025-09-30 22:20:15.911869', 'step': 6726, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:15.964195', 'step': 6726, 'epoch': 1} {'type': 'loss', 'content': 0.1079811304807663, 'timestamp': '2025-09-30 22:20:15.967591', 'step': 6727, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:16.008449', 'step': 6727, 'epoch': 1} {'type': 'loss', 'content': 0.09494903683662415, 'timestamp': '2025-09-30 22:20:16.038226', 'step': 6728, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:16.076531', 'step': 6728, 'epoch': 1} {'type': 'loss', 'content': 0.1714828461408615, 'timestamp': '2025-09-30 22:20:16.084864', 'step': 6729, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:16.143331', 'step': 6729, 'epoch': 1} {'type': 'loss', 'content': 0.2111901193857193, 'timestamp': '2025-09-30 22:20:16.151973', 'step': 6730, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:16.185289', 'step': 6730, 'epoch': 1} {'type': 'loss', 'content': 0.21373611688613892, 'timestamp': '2025-09-30 22:20:16.194370', 'step': 6731, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:16.228264', 'step': 6731, 'epoch': 1} {'type': 'loss', 'content': 0.22680290043354034, 'timestamp': '2025-09-30 22:20:16.253222', 'step': 6732, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:16.310177', 'step': 6732, 'epoch': 1} {'type': 'loss', 'content': 0.10980110615491867, 'timestamp': '2025-09-30 22:20:16.320230', 'step': 6733, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.385621', 'step': 6733, 'epoch': 1} {'type': 'loss', 'content': 0.1736936867237091, 'timestamp': '2025-09-30 22:20:16.394917', 'step': 6734, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.438267', 'step': 6734, 'epoch': 1} {'type': 'loss', 'content': 0.06481169909238815, 'timestamp': '2025-09-30 22:20:16.444204', 'step': 6735, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.492557', 'step': 6735, 'epoch': 1} {'type': 'loss', 'content': 0.15444903075695038, 'timestamp': '2025-09-30 22:20:16.517259', 'step': 6736, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.555259', 'step': 6736, 'epoch': 1} {'type': 'loss', 'content': 0.17121833562850952, 'timestamp': '2025-09-30 22:20:16.564903', 'step': 6737, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.614168', 'step': 6737, 'epoch': 1} {'type': 'loss', 'content': 0.14841516315937042, 'timestamp': '2025-09-30 22:20:16.623460', 'step': 6738, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:16.676306', 'step': 6738, 'epoch': 1} {'type': 'loss', 'content': 0.17154444754123688, 'timestamp': '2025-09-30 22:20:16.678453', 'step': 6739, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:16.709772', 'step': 6739, 'epoch': 1} {'type': 'loss', 'content': 0.13675856590270996, 'timestamp': '2025-09-30 22:20:16.735184', 'step': 6740, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.793383', 'step': 6740, 'epoch': 1} {'type': 'loss', 'content': 0.19159890711307526, 'timestamp': '2025-09-30 22:20:16.799022', 'step': 6741, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:16.853977', 'step': 6741, 'epoch': 1} {'type': 'loss', 'content': 0.1470138430595398, 'timestamp': '2025-09-30 22:20:16.857868', 'step': 6742, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:16.905798', 'step': 6742, 'epoch': 1} {'type': 'loss', 'content': 0.1442173421382904, 'timestamp': '2025-09-30 22:20:16.922240', 'step': 6743, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:16.962349', 'step': 6743, 'epoch': 1} {'type': 'loss', 'content': 0.12597425282001495, 'timestamp': '2025-09-30 22:20:16.997973', 'step': 6744, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.059549', 'step': 6744, 'epoch': 1} {'type': 'loss', 'content': 0.12969353795051575, 'timestamp': '2025-09-30 22:20:17.062827', 'step': 6745, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.094984', 'step': 6745, 'epoch': 1} {'type': 'loss', 'content': 0.12922672927379608, 'timestamp': '2025-09-30 22:20:17.106105', 'step': 6746, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.142884', 'step': 6746, 'epoch': 1} {'type': 'loss', 'content': 0.1681823879480362, 'timestamp': '2025-09-30 22:20:17.151586', 'step': 6747, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.182676', 'step': 6747, 'epoch': 1} {'type': 'loss', 'content': 0.1899537295103073, 'timestamp': '2025-09-30 22:20:17.206707', 'step': 6748, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.238732', 'step': 6748, 'epoch': 1} {'type': 'loss', 'content': 0.1291678249835968, 'timestamp': '2025-09-30 22:20:17.247167', 'step': 6749, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.284678', 'step': 6749, 'epoch': 1} {'type': 'loss', 'content': 0.24307161569595337, 'timestamp': '2025-09-30 22:20:17.287398', 'step': 6750, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.321062', 'step': 6750, 'epoch': 1} {'type': 'loss', 'content': 0.09231482446193695, 'timestamp': '2025-09-30 22:20:17.324176', 'step': 6751, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.354498', 'step': 6751, 'epoch': 1} {'type': 'loss', 'content': 0.1842930167913437, 'timestamp': '2025-09-30 22:20:17.378749', 'step': 6752, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.410157', 'step': 6752, 'epoch': 1} {'type': 'loss', 'content': 0.1681494414806366, 'timestamp': '2025-09-30 22:20:17.413398', 'step': 6753, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:17.456718', 'step': 6753, 'epoch': 1} {'type': 'loss', 'content': 0.13685664534568787, 'timestamp': '2025-09-30 22:20:17.463716', 'step': 6754, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.500445', 'step': 6754, 'epoch': 1} {'type': 'loss', 'content': 0.1596195548772812, 'timestamp': '2025-09-30 22:20:17.507204', 'step': 6755, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:17.543329', 'step': 6755, 'epoch': 1} {'type': 'loss', 'content': 0.14843375980854034, 'timestamp': '2025-09-30 22:20:17.570700', 'step': 6756, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:17.609259', 'step': 6756, 'epoch': 1} {'type': 'loss', 'content': 0.11946077644824982, 'timestamp': '2025-09-30 22:20:17.613902', 'step': 6757, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:17.652897', 'step': 6757, 'epoch': 1} {'type': 'loss', 'content': 0.04661598429083824, 'timestamp': '2025-09-30 22:20:17.656745', 'step': 6758, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:17.690607', 'step': 6758, 'epoch': 1} {'type': 'loss', 'content': 0.12597143650054932, 'timestamp': '2025-09-30 22:20:17.700028', 'step': 6759, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:17.739736', 'step': 6759, 'epoch': 1} {'type': 'loss', 'content': 0.10802150517702103, 'timestamp': '2025-09-30 22:20:17.766257', 'step': 6760, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.803935', 'step': 6760, 'epoch': 1} {'type': 'loss', 'content': 0.14012131094932556, 'timestamp': '2025-09-30 22:20:17.814739', 'step': 6761, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:17.855705', 'step': 6761, 'epoch': 1} {'type': 'loss', 'content': 0.17079535126686096, 'timestamp': '2025-09-30 22:20:17.858555', 'step': 6762, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:17.889678', 'step': 6762, 'epoch': 1} {'type': 'loss', 'content': 0.12811917066574097, 'timestamp': '2025-09-30 22:20:17.892787', 'step': 6763, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.928750', 'step': 6763, 'epoch': 1} {'type': 'loss', 'content': 0.10646231472492218, 'timestamp': '2025-09-30 22:20:17.960201', 'step': 6764, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:17.991484', 'step': 6764, 'epoch': 1} {'type': 'loss', 'content': 0.13403908908367157, 'timestamp': '2025-09-30 22:20:18.001069', 'step': 6765, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.031676', 'step': 6765, 'epoch': 1} {'type': 'loss', 'content': 0.20688262581825256, 'timestamp': '2025-09-30 22:20:18.036856', 'step': 6766, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.078578', 'step': 6766, 'epoch': 1} {'type': 'loss', 'content': 0.16400274634361267, 'timestamp': '2025-09-30 22:20:18.086464', 'step': 6767, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.124606', 'step': 6767, 'epoch': 1} {'type': 'loss', 'content': 0.19502654671669006, 'timestamp': '2025-09-30 22:20:18.153882', 'step': 6768, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.185670', 'step': 6768, 'epoch': 1} {'type': 'loss', 'content': 0.07678733766078949, 'timestamp': '2025-09-30 22:20:18.189875', 'step': 6769, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.220956', 'step': 6769, 'epoch': 1} {'type': 'loss', 'content': 0.22901244461536407, 'timestamp': '2025-09-30 22:20:18.223501', 'step': 6770, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:18.265659', 'step': 6770, 'epoch': 1} {'type': 'loss', 'content': 0.09800662845373154, 'timestamp': '2025-09-30 22:20:18.268653', 'step': 6771, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:18.306411', 'step': 6771, 'epoch': 1} {'type': 'loss', 'content': 0.17519958317279816, 'timestamp': '2025-09-30 22:20:18.340010', 'step': 6772, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:18.373426', 'step': 6772, 'epoch': 1} {'type': 'loss', 'content': 0.09908288717269897, 'timestamp': '2025-09-30 22:20:18.377478', 'step': 6773, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.409887', 'step': 6773, 'epoch': 1} {'type': 'loss', 'content': 0.1921156495809555, 'timestamp': '2025-09-30 22:20:18.423666', 'step': 6774, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.455688', 'step': 6774, 'epoch': 1} {'type': 'loss', 'content': 0.054121557623147964, 'timestamp': '2025-09-30 22:20:18.464807', 'step': 6775, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.508338', 'step': 6775, 'epoch': 1} {'type': 'loss', 'content': 0.09972196817398071, 'timestamp': '2025-09-30 22:20:18.533351', 'step': 6776, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.571185', 'step': 6776, 'epoch': 1} {'type': 'loss', 'content': 0.09471810609102249, 'timestamp': '2025-09-30 22:20:18.582379', 'step': 6777, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.620395', 'step': 6777, 'epoch': 1} {'type': 'loss', 'content': 0.17731447517871857, 'timestamp': '2025-09-30 22:20:18.630365', 'step': 6778, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.668034', 'step': 6778, 'epoch': 1} {'type': 'loss', 'content': 0.14493075013160706, 'timestamp': '2025-09-30 22:20:18.670961', 'step': 6779, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.701272', 'step': 6779, 'epoch': 1} {'type': 'loss', 'content': 0.19351616501808167, 'timestamp': '2025-09-30 22:20:18.730312', 'step': 6780, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:18.761704', 'step': 6780, 'epoch': 1} {'type': 'loss', 'content': 0.07759559154510498, 'timestamp': '2025-09-30 22:20:18.770707', 'step': 6781, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:18.807874', 'step': 6781, 'epoch': 1} {'type': 'loss', 'content': 0.09425938129425049, 'timestamp': '2025-09-30 22:20:18.818518', 'step': 6782, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:18.855967', 'step': 6782, 'epoch': 1} {'type': 'loss', 'content': 0.20470651984214783, 'timestamp': '2025-09-30 22:20:18.860456', 'step': 6783, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:18.898908', 'step': 6783, 'epoch': 1} {'type': 'loss', 'content': 0.13391637802124023, 'timestamp': '2025-09-30 22:20:18.931389', 'step': 6784, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:18.970796', 'step': 6784, 'epoch': 1} {'type': 'loss', 'content': 0.1538958102464676, 'timestamp': '2025-09-30 22:20:18.979925', 'step': 6785, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.017661', 'step': 6785, 'epoch': 1} {'type': 'loss', 'content': 0.10646697133779526, 'timestamp': '2025-09-30 22:20:19.026517', 'step': 6786, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:19.060892', 'step': 6786, 'epoch': 1} {'type': 'loss', 'content': 0.2515111267566681, 'timestamp': '2025-09-30 22:20:19.068400', 'step': 6787, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.107265', 'step': 6787, 'epoch': 1} {'type': 'loss', 'content': 0.11115722358226776, 'timestamp': '2025-09-30 22:20:19.137978', 'step': 6788, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.176665', 'step': 6788, 'epoch': 1} {'type': 'loss', 'content': 0.1319073736667633, 'timestamp': '2025-09-30 22:20:19.179952', 'step': 6789, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.212791', 'step': 6789, 'epoch': 1} {'type': 'loss', 'content': 0.21009594202041626, 'timestamp': '2025-09-30 22:20:19.216363', 'step': 6790, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.252691', 'step': 6790, 'epoch': 1} {'type': 'loss', 'content': 0.13356909155845642, 'timestamp': '2025-09-30 22:20:19.256856', 'step': 6791, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:19.294344', 'step': 6791, 'epoch': 1} {'type': 'loss', 'content': 0.15575402975082397, 'timestamp': '2025-09-30 22:20:19.327354', 'step': 6792, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:19.359456', 'step': 6792, 'epoch': 1} {'type': 'loss', 'content': 0.0991024449467659, 'timestamp': '2025-09-30 22:20:19.369480', 'step': 6793, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:19.409908', 'step': 6793, 'epoch': 1} {'type': 'loss', 'content': 0.180453822016716, 'timestamp': '2025-09-30 22:20:19.419859', 'step': 6794, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.456625', 'step': 6794, 'epoch': 1} {'type': 'loss', 'content': 0.08244478702545166, 'timestamp': '2025-09-30 22:20:19.467527', 'step': 6795, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.503858', 'step': 6795, 'epoch': 1} {'type': 'loss', 'content': 0.12372986227273941, 'timestamp': '2025-09-30 22:20:19.536865', 'step': 6796, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.569316', 'step': 6796, 'epoch': 1} {'type': 'loss', 'content': 0.20775534212589264, 'timestamp': '2025-09-30 22:20:19.580405', 'step': 6797, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.616796', 'step': 6797, 'epoch': 1} {'type': 'loss', 'content': 0.1896275132894516, 'timestamp': '2025-09-30 22:20:19.625641', 'step': 6798, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.660454', 'step': 6798, 'epoch': 1} {'type': 'loss', 'content': 0.12364847213029861, 'timestamp': '2025-09-30 22:20:19.664506', 'step': 6799, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:19.700018', 'step': 6799, 'epoch': 1} {'type': 'loss', 'content': 0.20912492275238037, 'timestamp': '2025-09-30 22:20:19.725110', 'step': 6800, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.762161', 'step': 6800, 'epoch': 1} {'type': 'loss', 'content': 0.1370323747396469, 'timestamp': '2025-09-30 22:20:19.772296', 'step': 6801, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.817138', 'step': 6801, 'epoch': 1} {'type': 'loss', 'content': 0.2014571726322174, 'timestamp': '2025-09-30 22:20:19.825774', 'step': 6802, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.864348', 'step': 6802, 'epoch': 1} {'type': 'loss', 'content': 0.1935512125492096, 'timestamp': '2025-09-30 22:20:19.875264', 'step': 6803, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:19.912992', 'step': 6803, 'epoch': 1} {'type': 'loss', 'content': 0.10494747012853622, 'timestamp': '2025-09-30 22:20:19.938848', 'step': 6804, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:19.975615', 'step': 6804, 'epoch': 1} {'type': 'loss', 'content': 0.20338265597820282, 'timestamp': '2025-09-30 22:20:19.986961', 'step': 6805, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:20.026905', 'step': 6805, 'epoch': 1} {'type': 'loss', 'content': 0.11150859296321869, 'timestamp': '2025-09-30 22:20:20.036723', 'step': 6806, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.074505', 'step': 6806, 'epoch': 1} {'type': 'loss', 'content': 0.07571738958358765, 'timestamp': '2025-09-30 22:20:20.082426', 'step': 6807, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:20.119293', 'step': 6807, 'epoch': 1} {'type': 'loss', 'content': 0.13649681210517883, 'timestamp': '2025-09-30 22:20:20.151299', 'step': 6808, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:20.186951', 'step': 6808, 'epoch': 1} {'type': 'loss', 'content': 0.13180305063724518, 'timestamp': '2025-09-30 22:20:20.189859', 'step': 6809, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:20.222585', 'step': 6809, 'epoch': 1} {'type': 'loss', 'content': 0.18658539652824402, 'timestamp': '2025-09-30 22:20:20.231285', 'step': 6810, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:20.263552', 'step': 6810, 'epoch': 1} {'type': 'loss', 'content': 0.11412861943244934, 'timestamp': '2025-09-30 22:20:20.267820', 'step': 6811, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:20.304570', 'step': 6811, 'epoch': 1} {'type': 'loss', 'content': 0.1713389754295349, 'timestamp': '2025-09-30 22:20:20.335503', 'step': 6812, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.371492', 'step': 6812, 'epoch': 1} {'type': 'loss', 'content': 0.10341059416532516, 'timestamp': '2025-09-30 22:20:20.381239', 'step': 6813, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.417160', 'step': 6813, 'epoch': 1} {'type': 'loss', 'content': 0.11268109083175659, 'timestamp': '2025-09-30 22:20:20.428239', 'step': 6814, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.470198', 'step': 6814, 'epoch': 1} {'type': 'loss', 'content': 0.14801500737667084, 'timestamp': '2025-09-30 22:20:20.481524', 'step': 6815, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.513277', 'step': 6815, 'epoch': 1} {'type': 'loss', 'content': 0.10361465066671371, 'timestamp': '2025-09-30 22:20:20.538232', 'step': 6816, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.582326', 'step': 6816, 'epoch': 1} {'type': 'loss', 'content': 0.14684395492076874, 'timestamp': '2025-09-30 22:20:20.592522', 'step': 6817, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.624618', 'step': 6817, 'epoch': 1} {'type': 'loss', 'content': 0.17930792272090912, 'timestamp': '2025-09-30 22:20:20.636251', 'step': 6818, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:20.674917', 'step': 6818, 'epoch': 1} {'type': 'loss', 'content': 0.20911531150341034, 'timestamp': '2025-09-30 22:20:20.678234', 'step': 6819, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.714897', 'step': 6819, 'epoch': 1} {'type': 'loss', 'content': 0.0962798073887825, 'timestamp': '2025-09-30 22:20:20.746247', 'step': 6820, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:20.778548', 'step': 6820, 'epoch': 1} {'type': 'loss', 'content': 0.15952205657958984, 'timestamp': '2025-09-30 22:20:20.790458', 'step': 6821, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:20.830833', 'step': 6821, 'epoch': 1} {'type': 'loss', 'content': 0.12287880480289459, 'timestamp': '2025-09-30 22:20:20.833935', 'step': 6822, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:20.866463', 'step': 6822, 'epoch': 1} {'type': 'loss', 'content': 0.12519806623458862, 'timestamp': '2025-09-30 22:20:20.879355', 'step': 6823, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:20.912367', 'step': 6823, 'epoch': 1} {'type': 'loss', 'content': 0.16955013573169708, 'timestamp': '2025-09-30 22:20:20.943207', 'step': 6824, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:20.975467', 'step': 6824, 'epoch': 1} {'type': 'loss', 'content': 0.16330347955226898, 'timestamp': '2025-09-30 22:20:20.978641', 'step': 6825, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.010907', 'step': 6825, 'epoch': 1} {'type': 'loss', 'content': 0.1973763108253479, 'timestamp': '2025-09-30 22:20:21.019711', 'step': 6826, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:21.057982', 'step': 6826, 'epoch': 1} {'type': 'loss', 'content': 0.20233015716075897, 'timestamp': '2025-09-30 22:20:21.065335', 'step': 6827, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:21.101739', 'step': 6827, 'epoch': 1} {'type': 'loss', 'content': 0.09515563398599625, 'timestamp': '2025-09-30 22:20:21.134410', 'step': 6828, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:21.172808', 'step': 6828, 'epoch': 1} {'type': 'loss', 'content': 0.11175484210252762, 'timestamp': '2025-09-30 22:20:21.181951', 'step': 6829, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:21.214925', 'step': 6829, 'epoch': 1} {'type': 'loss', 'content': 0.10079095512628555, 'timestamp': '2025-09-30 22:20:21.219220', 'step': 6830, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:21.254894', 'step': 6830, 'epoch': 1} {'type': 'loss', 'content': 0.2455013543367386, 'timestamp': '2025-09-30 22:20:21.265391', 'step': 6831, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:21.312267', 'step': 6831, 'epoch': 1} {'type': 'loss', 'content': 0.09758421033620834, 'timestamp': '2025-09-30 22:20:21.342126', 'step': 6832, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.378215', 'step': 6832, 'epoch': 1} {'type': 'loss', 'content': 0.184916153550148, 'timestamp': '2025-09-30 22:20:21.382594', 'step': 6833, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:21.414791', 'step': 6833, 'epoch': 1} {'type': 'loss', 'content': 0.16427095234394073, 'timestamp': '2025-09-30 22:20:21.425425', 'step': 6834, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.464440', 'step': 6834, 'epoch': 1} {'type': 'loss', 'content': 0.09877981245517731, 'timestamp': '2025-09-30 22:20:21.475681', 'step': 6835, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:21.513167', 'step': 6835, 'epoch': 1} {'type': 'loss', 'content': 0.14285120368003845, 'timestamp': '2025-09-30 22:20:21.544350', 'step': 6836, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:21.576435', 'step': 6836, 'epoch': 1} {'type': 'loss', 'content': 0.19665804505348206, 'timestamp': '2025-09-30 22:20:21.579568', 'step': 6837, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.625328', 'step': 6837, 'epoch': 1} {'type': 'loss', 'content': 0.1119651272892952, 'timestamp': '2025-09-30 22:20:21.628962', 'step': 6838, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:21.675928', 'step': 6838, 'epoch': 1} {'type': 'loss', 'content': 0.16162548959255219, 'timestamp': '2025-09-30 22:20:21.686591', 'step': 6839, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.726409', 'step': 6839, 'epoch': 1} {'type': 'loss', 'content': 0.13695435225963593, 'timestamp': '2025-09-30 22:20:21.751847', 'step': 6840, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:21.782919', 'step': 6840, 'epoch': 1} {'type': 'loss', 'content': 0.13323213160037994, 'timestamp': '2025-09-30 22:20:21.794148', 'step': 6841, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:21.834010', 'step': 6841, 'epoch': 1} {'type': 'loss', 'content': 0.12705768644809723, 'timestamp': '2025-09-30 22:20:21.837390', 'step': 6842, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:21.868506', 'step': 6842, 'epoch': 1} {'type': 'loss', 'content': 0.13851769268512726, 'timestamp': '2025-09-30 22:20:21.877410', 'step': 6843, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:21.910447', 'step': 6843, 'epoch': 1} {'type': 'loss', 'content': 0.1283094733953476, 'timestamp': '2025-09-30 22:20:21.940366', 'step': 6844, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:21.971175', 'step': 6844, 'epoch': 1} {'type': 'loss', 'content': 0.14520400762557983, 'timestamp': '2025-09-30 22:20:21.981056', 'step': 6845, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:22.017855', 'step': 6845, 'epoch': 1} {'type': 'loss', 'content': 0.10337551683187485, 'timestamp': '2025-09-30 22:20:22.022092', 'step': 6846, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.053753', 'step': 6846, 'epoch': 1} {'type': 'loss', 'content': 0.11378420144319534, 'timestamp': '2025-09-30 22:20:22.062770', 'step': 6847, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.096554', 'step': 6847, 'epoch': 1} {'type': 'loss', 'content': 0.1378411501646042, 'timestamp': '2025-09-30 22:20:22.126096', 'step': 6848, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.171730', 'step': 6848, 'epoch': 1} {'type': 'loss', 'content': 0.12108459323644638, 'timestamp': '2025-09-30 22:20:22.180687', 'step': 6849, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.218123', 'step': 6849, 'epoch': 1} {'type': 'loss', 'content': 0.08843602240085602, 'timestamp': '2025-09-30 22:20:22.221839', 'step': 6850, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.252238', 'step': 6850, 'epoch': 1} {'type': 'loss', 'content': 0.12312900274991989, 'timestamp': '2025-09-30 22:20:22.255329', 'step': 6851, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.285103', 'step': 6851, 'epoch': 1} {'type': 'loss', 'content': 0.11289486289024353, 'timestamp': '2025-09-30 22:20:22.309600', 'step': 6852, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.350483', 'step': 6852, 'epoch': 1} {'type': 'loss', 'content': 0.12027259916067123, 'timestamp': '2025-09-30 22:20:22.353461', 'step': 6853, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:22.395013', 'step': 6853, 'epoch': 1} {'type': 'loss', 'content': 0.19743697345256805, 'timestamp': '2025-09-30 22:20:22.406037', 'step': 6854, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.444313', 'step': 6854, 'epoch': 1} {'type': 'loss', 'content': 0.19328083097934723, 'timestamp': '2025-09-30 22:20:22.455496', 'step': 6855, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.494888', 'step': 6855, 'epoch': 1} {'type': 'loss', 'content': 0.18613967299461365, 'timestamp': '2025-09-30 22:20:22.519737', 'step': 6856, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.554989', 'step': 6856, 'epoch': 1} {'type': 'loss', 'content': 0.078695148229599, 'timestamp': '2025-09-30 22:20:22.557972', 'step': 6857, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:22.588166', 'step': 6857, 'epoch': 1} {'type': 'loss', 'content': 0.12447592616081238, 'timestamp': '2025-09-30 22:20:22.591072', 'step': 6858, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:22.621089', 'step': 6858, 'epoch': 1} {'type': 'loss', 'content': 0.11469735205173492, 'timestamp': '2025-09-30 22:20:22.629796', 'step': 6859, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.670326', 'step': 6859, 'epoch': 1} {'type': 'loss', 'content': 0.09689513593912125, 'timestamp': '2025-09-30 22:20:22.694204', 'step': 6860, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.727343', 'step': 6860, 'epoch': 1} {'type': 'loss', 'content': 0.15494075417518616, 'timestamp': '2025-09-30 22:20:22.730526', 'step': 6861, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:22.768324', 'step': 6861, 'epoch': 1} {'type': 'loss', 'content': 0.1400042474269867, 'timestamp': '2025-09-30 22:20:22.778019', 'step': 6862, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:22.814164', 'step': 6862, 'epoch': 1} {'type': 'loss', 'content': 0.2659602165222168, 'timestamp': '2025-09-30 22:20:22.816792', 'step': 6863, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:22.846950', 'step': 6863, 'epoch': 1} {'type': 'loss', 'content': 0.1784014105796814, 'timestamp': '2025-09-30 22:20:22.877789', 'step': 6864, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.914844', 'step': 6864, 'epoch': 1} {'type': 'loss', 'content': 0.09615056216716766, 'timestamp': '2025-09-30 22:20:22.928871', 'step': 6865, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.960409', 'step': 6865, 'epoch': 1} {'type': 'loss', 'content': 0.10480961948633194, 'timestamp': '2025-09-30 22:20:22.963667', 'step': 6866, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:22.994606', 'step': 6866, 'epoch': 1} {'type': 'loss', 'content': 0.12812693417072296, 'timestamp': '2025-09-30 22:20:22.998268', 'step': 6867, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.037317', 'step': 6867, 'epoch': 1} {'type': 'loss', 'content': 0.11646901071071625, 'timestamp': '2025-09-30 22:20:23.062017', 'step': 6868, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.099548', 'step': 6868, 'epoch': 1} {'type': 'loss', 'content': 0.21261847019195557, 'timestamp': '2025-09-30 22:20:23.102331', 'step': 6869, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.137784', 'step': 6869, 'epoch': 1} {'type': 'loss', 'content': 0.12376004457473755, 'timestamp': '2025-09-30 22:20:23.146051', 'step': 6870, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:23.184594', 'step': 6870, 'epoch': 1} {'type': 'loss', 'content': 0.16836562752723694, 'timestamp': '2025-09-30 22:20:23.194013', 'step': 6871, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:23.231523', 'step': 6871, 'epoch': 1} {'type': 'loss', 'content': 0.17740872502326965, 'timestamp': '2025-09-30 22:20:23.257447', 'step': 6872, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.290998', 'step': 6872, 'epoch': 1} {'type': 'loss', 'content': 0.19021907448768616, 'timestamp': '2025-09-30 22:20:23.298768', 'step': 6873, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:23.329858', 'step': 6873, 'epoch': 1} {'type': 'loss', 'content': 0.14019286632537842, 'timestamp': '2025-09-30 22:20:23.332641', 'step': 6874, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:23.369832', 'step': 6874, 'epoch': 1} {'type': 'loss', 'content': 0.24510151147842407, 'timestamp': '2025-09-30 22:20:23.372471', 'step': 6875, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:23.408052', 'step': 6875, 'epoch': 1} {'type': 'loss', 'content': 0.12461558729410172, 'timestamp': '2025-09-30 22:20:23.432886', 'step': 6876, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:23.464881', 'step': 6876, 'epoch': 1} {'type': 'loss', 'content': 0.12036871910095215, 'timestamp': '2025-09-30 22:20:23.476885', 'step': 6877, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:23.515168', 'step': 6877, 'epoch': 1} {'type': 'loss', 'content': 0.1782943606376648, 'timestamp': '2025-09-30 22:20:23.527583', 'step': 6878, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.559791', 'step': 6878, 'epoch': 1} {'type': 'loss', 'content': 0.16420170664787292, 'timestamp': '2025-09-30 22:20:23.563549', 'step': 6879, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.593413', 'step': 6879, 'epoch': 1} {'type': 'loss', 'content': 0.11065571010112762, 'timestamp': '2025-09-30 22:20:23.617509', 'step': 6880, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:23.653723', 'step': 6880, 'epoch': 1} {'type': 'loss', 'content': 0.12931664288043976, 'timestamp': '2025-09-30 22:20:23.657514', 'step': 6881, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:23.687697', 'step': 6881, 'epoch': 1} {'type': 'loss', 'content': 0.29517099261283875, 'timestamp': '2025-09-30 22:20:23.696819', 'step': 6882, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:23.729328', 'step': 6882, 'epoch': 1} {'type': 'loss', 'content': 0.1273186355829239, 'timestamp': '2025-09-30 22:20:23.732703', 'step': 6883, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:23.771412', 'step': 6883, 'epoch': 1} {'type': 'loss', 'content': 0.14711488783359528, 'timestamp': '2025-09-30 22:20:23.797673', 'step': 6884, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.841416', 'step': 6884, 'epoch': 1} {'type': 'loss', 'content': 0.13231340050697327, 'timestamp': '2025-09-30 22:20:23.853910', 'step': 6885, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:23.884880', 'step': 6885, 'epoch': 1} {'type': 'loss', 'content': 0.11522176861763, 'timestamp': '2025-09-30 22:20:23.902819', 'step': 6886, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:23.950268', 'step': 6886, 'epoch': 1} {'type': 'loss', 'content': 0.1274770349264145, 'timestamp': '2025-09-30 22:20:23.965453', 'step': 6887, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:24.007577', 'step': 6887, 'epoch': 1} {'type': 'loss', 'content': 0.09644537419080734, 'timestamp': '2025-09-30 22:20:24.042010', 'step': 6888, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.074160', 'step': 6888, 'epoch': 1} {'type': 'loss', 'content': 0.10519327968358994, 'timestamp': '2025-09-30 22:20:24.097106', 'step': 6889, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.144507', 'step': 6889, 'epoch': 1} {'type': 'loss', 'content': 0.10682889819145203, 'timestamp': '2025-09-30 22:20:24.149751', 'step': 6890, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:24.183476', 'step': 6890, 'epoch': 1} {'type': 'loss', 'content': 0.14887404441833496, 'timestamp': '2025-09-30 22:20:24.188167', 'step': 6891, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:24.221320', 'step': 6891, 'epoch': 1} {'type': 'loss', 'content': 0.14278943836688995, 'timestamp': '2025-09-30 22:20:24.246576', 'step': 6892, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.277502', 'step': 6892, 'epoch': 1} {'type': 'loss', 'content': 0.22532616555690765, 'timestamp': '2025-09-30 22:20:24.281330', 'step': 6893, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:24.312532', 'step': 6893, 'epoch': 1} {'type': 'loss', 'content': 0.08736147731542587, 'timestamp': '2025-09-30 22:20:24.320621', 'step': 6894, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:24.358444', 'step': 6894, 'epoch': 1} {'type': 'loss', 'content': 0.15178577601909637, 'timestamp': '2025-09-30 22:20:24.360980', 'step': 6895, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.404036', 'step': 6895, 'epoch': 1} {'type': 'loss', 'content': 0.18760986626148224, 'timestamp': '2025-09-30 22:20:24.430874', 'step': 6896, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:24.463751', 'step': 6896, 'epoch': 1} {'type': 'loss', 'content': 0.12681467831134796, 'timestamp': '2025-09-30 22:20:24.467243', 'step': 6897, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:24.498330', 'step': 6897, 'epoch': 1} {'type': 'loss', 'content': 0.15392878651618958, 'timestamp': '2025-09-30 22:20:24.507420', 'step': 6898, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.542828', 'step': 6898, 'epoch': 1} {'type': 'loss', 'content': 0.2084232121706009, 'timestamp': '2025-09-30 22:20:24.547776', 'step': 6899, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:24.582579', 'step': 6899, 'epoch': 1} {'type': 'loss', 'content': 0.1734045147895813, 'timestamp': '2025-09-30 22:20:24.608014', 'step': 6900, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:24.647882', 'step': 6900, 'epoch': 1} {'type': 'loss', 'content': 0.1415337771177292, 'timestamp': '2025-09-30 22:20:24.651086', 'step': 6901, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.689965', 'step': 6901, 'epoch': 1} {'type': 'loss', 'content': 0.32483479380607605, 'timestamp': '2025-09-30 22:20:24.694336', 'step': 6902, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:24.726324', 'step': 6902, 'epoch': 1} {'type': 'loss', 'content': 0.21364694833755493, 'timestamp': '2025-09-30 22:20:24.732259', 'step': 6903, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.765714', 'step': 6903, 'epoch': 1} {'type': 'loss', 'content': 0.12002164870500565, 'timestamp': '2025-09-30 22:20:24.793951', 'step': 6904, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:24.826902', 'step': 6904, 'epoch': 1} {'type': 'loss', 'content': 0.21226923167705536, 'timestamp': '2025-09-30 22:20:24.830128', 'step': 6905, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:24.864675', 'step': 6905, 'epoch': 1} {'type': 'loss', 'content': 0.19900190830230713, 'timestamp': '2025-09-30 22:20:24.872458', 'step': 6906, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:24.911927', 'step': 6906, 'epoch': 1} {'type': 'loss', 'content': 0.1563330441713333, 'timestamp': '2025-09-30 22:20:24.921687', 'step': 6907, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:24.957596', 'step': 6907, 'epoch': 1} {'type': 'loss', 'content': 0.09528931230306625, 'timestamp': '2025-09-30 22:20:24.989383', 'step': 6908, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.020156', 'step': 6908, 'epoch': 1} {'type': 'loss', 'content': 0.16153906285762787, 'timestamp': '2025-09-30 22:20:25.023996', 'step': 6909, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.073667', 'step': 6909, 'epoch': 1} {'type': 'loss', 'content': 0.17575959861278534, 'timestamp': '2025-09-30 22:20:25.077231', 'step': 6910, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.109079', 'step': 6910, 'epoch': 1} {'type': 'loss', 'content': 0.1301574409008026, 'timestamp': '2025-09-30 22:20:25.113529', 'step': 6911, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.155161', 'step': 6911, 'epoch': 1} {'type': 'loss', 'content': 0.11538707464933395, 'timestamp': '2025-09-30 22:20:25.180169', 'step': 6912, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.219768', 'step': 6912, 'epoch': 1} {'type': 'loss', 'content': 0.1591431200504303, 'timestamp': '2025-09-30 22:20:25.227212', 'step': 6913, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.262104', 'step': 6913, 'epoch': 1} {'type': 'loss', 'content': 0.14940963685512543, 'timestamp': '2025-09-30 22:20:25.267995', 'step': 6914, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.314838', 'step': 6914, 'epoch': 1} {'type': 'loss', 'content': 0.18279147148132324, 'timestamp': '2025-09-30 22:20:25.325933', 'step': 6915, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.357159', 'step': 6915, 'epoch': 1} {'type': 'loss', 'content': 0.111714206635952, 'timestamp': '2025-09-30 22:20:25.389894', 'step': 6916, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:25.420856', 'step': 6916, 'epoch': 1} {'type': 'loss', 'content': 0.10106496512889862, 'timestamp': '2025-09-30 22:20:25.430924', 'step': 6917, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:25.462334', 'step': 6917, 'epoch': 1} {'type': 'loss', 'content': 0.07845775783061981, 'timestamp': '2025-09-30 22:20:25.465667', 'step': 6918, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:25.503902', 'step': 6918, 'epoch': 1} {'type': 'loss', 'content': 0.1300162672996521, 'timestamp': '2025-09-30 22:20:25.512290', 'step': 6919, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.543316', 'step': 6919, 'epoch': 1} {'type': 'loss', 'content': 0.08511573076248169, 'timestamp': '2025-09-30 22:20:25.573015', 'step': 6920, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:25.603817', 'step': 6920, 'epoch': 1} {'type': 'loss', 'content': 0.09349973499774933, 'timestamp': '2025-09-30 22:20:25.606868', 'step': 6921, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:25.642492', 'step': 6921, 'epoch': 1} {'type': 'loss', 'content': 0.1197185069322586, 'timestamp': '2025-09-30 22:20:25.650232', 'step': 6922, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:25.686776', 'step': 6922, 'epoch': 1} {'type': 'loss', 'content': 0.15670308470726013, 'timestamp': '2025-09-30 22:20:25.692447', 'step': 6923, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.726903', 'step': 6923, 'epoch': 1} {'type': 'loss', 'content': 0.11527416110038757, 'timestamp': '2025-09-30 22:20:25.751337', 'step': 6924, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.794563', 'step': 6924, 'epoch': 1} {'type': 'loss', 'content': 0.13007546961307526, 'timestamp': '2025-09-30 22:20:25.798375', 'step': 6925, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:25.829540', 'step': 6925, 'epoch': 1} {'type': 'loss', 'content': 0.15325181186199188, 'timestamp': '2025-09-30 22:20:25.833770', 'step': 6926, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:25.872330', 'step': 6926, 'epoch': 1} {'type': 'loss', 'content': 0.17647568881511688, 'timestamp': '2025-09-30 22:20:25.886225', 'step': 6927, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.919021', 'step': 6927, 'epoch': 1} {'type': 'loss', 'content': 0.17504151165485382, 'timestamp': '2025-09-30 22:20:25.946856', 'step': 6928, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:25.982992', 'step': 6928, 'epoch': 1} {'type': 'loss', 'content': 0.1712007075548172, 'timestamp': '2025-09-30 22:20:25.992603', 'step': 6929, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:26.031355', 'step': 6929, 'epoch': 1} {'type': 'loss', 'content': 0.1895780861377716, 'timestamp': '2025-09-30 22:20:26.034689', 'step': 6930, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:26.065818', 'step': 6930, 'epoch': 1} {'type': 'loss', 'content': 0.11125688254833221, 'timestamp': '2025-09-30 22:20:26.073631', 'step': 6931, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:26.111722', 'step': 6931, 'epoch': 1} {'type': 'loss', 'content': 0.0595255009829998, 'timestamp': '2025-09-30 22:20:26.140636', 'step': 6932, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:26.175622', 'step': 6932, 'epoch': 1} {'type': 'loss', 'content': 0.08771105855703354, 'timestamp': '2025-09-30 22:20:26.185493', 'step': 6933, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:26.216408', 'step': 6933, 'epoch': 1} {'type': 'loss', 'content': 0.1353534609079361, 'timestamp': '2025-09-30 22:20:26.225554', 'step': 6934, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:26.263229', 'step': 6934, 'epoch': 1} {'type': 'loss', 'content': 0.10871029645204544, 'timestamp': '2025-09-30 22:20:26.269917', 'step': 6935, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:26.306173', 'step': 6935, 'epoch': 1} {'type': 'loss', 'content': 0.12756198644638062, 'timestamp': '2025-09-30 22:20:26.337332', 'step': 6936, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.370040', 'step': 6936, 'epoch': 1} {'type': 'loss', 'content': 0.11688686907291412, 'timestamp': '2025-09-30 22:20:26.379608', 'step': 6937, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:26.418478', 'step': 6937, 'epoch': 1} {'type': 'loss', 'content': 0.09206748753786087, 'timestamp': '2025-09-30 22:20:26.429165', 'step': 6938, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:20:26.462915', 'step': 6938, 'epoch': 1} {'type': 'loss', 'content': 0.1474301517009735, 'timestamp': '2025-09-30 22:20:26.467471', 'step': 6939, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.504179', 'step': 6939, 'epoch': 1} {'type': 'loss', 'content': 0.16425028443336487, 'timestamp': '2025-09-30 22:20:26.528979', 'step': 6940, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.561043', 'step': 6940, 'epoch': 1} {'type': 'loss', 'content': 0.17422185838222504, 'timestamp': '2025-09-30 22:20:26.569777', 'step': 6941, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.605861', 'step': 6941, 'epoch': 1} {'type': 'loss', 'content': 0.2256983071565628, 'timestamp': '2025-09-30 22:20:26.609115', 'step': 6942, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.640193', 'step': 6942, 'epoch': 1} {'type': 'loss', 'content': 0.14617115259170532, 'timestamp': '2025-09-30 22:20:26.647013', 'step': 6943, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:26.682278', 'step': 6943, 'epoch': 1} {'type': 'loss', 'content': 0.11595666408538818, 'timestamp': '2025-09-30 22:20:26.707300', 'step': 6944, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.742914', 'step': 6944, 'epoch': 1} {'type': 'loss', 'content': 0.16610610485076904, 'timestamp': '2025-09-30 22:20:26.747177', 'step': 6945, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.784804', 'step': 6945, 'epoch': 1} {'type': 'loss', 'content': 0.12087138742208481, 'timestamp': '2025-09-30 22:20:26.788492', 'step': 6946, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.823516', 'step': 6946, 'epoch': 1} {'type': 'loss', 'content': 0.16533301770687103, 'timestamp': '2025-09-30 22:20:26.826446', 'step': 6947, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:26.864965', 'step': 6947, 'epoch': 1} {'type': 'loss', 'content': 0.09292902052402496, 'timestamp': '2025-09-30 22:20:26.889823', 'step': 6948, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:26.928693', 'step': 6948, 'epoch': 1} {'type': 'loss', 'content': 0.14518320560455322, 'timestamp': '2025-09-30 22:20:26.932052', 'step': 6949, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:26.963105', 'step': 6949, 'epoch': 1} {'type': 'loss', 'content': 0.21350312232971191, 'timestamp': '2025-09-30 22:20:26.973487', 'step': 6950, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:27.003674', 'step': 6950, 'epoch': 1} {'type': 'loss', 'content': 0.1485159993171692, 'timestamp': '2025-09-30 22:20:27.006629', 'step': 6951, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.044085', 'step': 6951, 'epoch': 1} {'type': 'loss', 'content': 0.11600223928689957, 'timestamp': '2025-09-30 22:20:27.068917', 'step': 6952, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.106783', 'step': 6952, 'epoch': 1} {'type': 'loss', 'content': 0.16726520657539368, 'timestamp': '2025-09-30 22:20:27.117336', 'step': 6953, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.157141', 'step': 6953, 'epoch': 1} {'type': 'loss', 'content': 0.11263171583414078, 'timestamp': '2025-09-30 22:20:27.166998', 'step': 6954, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:27.204588', 'step': 6954, 'epoch': 1} {'type': 'loss', 'content': 0.22292140126228333, 'timestamp': '2025-09-30 22:20:27.213122', 'step': 6955, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.243994', 'step': 6955, 'epoch': 1} {'type': 'loss', 'content': 0.11810536682605743, 'timestamp': '2025-09-30 22:20:27.270049', 'step': 6956, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.299452', 'step': 6956, 'epoch': 1} {'type': 'loss', 'content': 0.2584218382835388, 'timestamp': '2025-09-30 22:20:27.309835', 'step': 6957, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.342272', 'step': 6957, 'epoch': 1} {'type': 'loss', 'content': 0.13356609642505646, 'timestamp': '2025-09-30 22:20:27.344712', 'step': 6958, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.376187', 'step': 6958, 'epoch': 1} {'type': 'loss', 'content': 0.19678427278995514, 'timestamp': '2025-09-30 22:20:27.379549', 'step': 6959, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:27.411613', 'step': 6959, 'epoch': 1} {'type': 'loss', 'content': 0.1778622418642044, 'timestamp': '2025-09-30 22:20:27.436188', 'step': 6960, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.473159', 'step': 6960, 'epoch': 1} {'type': 'loss', 'content': 0.15146595239639282, 'timestamp': '2025-09-30 22:20:27.475555', 'step': 6961, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.506314', 'step': 6961, 'epoch': 1} {'type': 'loss', 'content': 0.17784082889556885, 'timestamp': '2025-09-30 22:20:27.509363', 'step': 6962, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.540904', 'step': 6962, 'epoch': 1} {'type': 'loss', 'content': 0.12929566204547882, 'timestamp': '2025-09-30 22:20:27.544605', 'step': 6963, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.575776', 'step': 6963, 'epoch': 1} {'type': 'loss', 'content': 0.17370109260082245, 'timestamp': '2025-09-30 22:20:27.601116', 'step': 6964, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:27.633269', 'step': 6964, 'epoch': 1} {'type': 'loss', 'content': 0.14265824854373932, 'timestamp': '2025-09-30 22:20:27.636334', 'step': 6965, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.666935', 'step': 6965, 'epoch': 1} {'type': 'loss', 'content': 0.11732613295316696, 'timestamp': '2025-09-30 22:20:27.670803', 'step': 6966, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:27.702054', 'step': 6966, 'epoch': 1} {'type': 'loss', 'content': 0.11866509914398193, 'timestamp': '2025-09-30 22:20:27.707028', 'step': 6967, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.739165', 'step': 6967, 'epoch': 1} {'type': 'loss', 'content': 0.17803101241588593, 'timestamp': '2025-09-30 22:20:27.763701', 'step': 6968, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:27.796103', 'step': 6968, 'epoch': 1} {'type': 'loss', 'content': 0.06397032737731934, 'timestamp': '2025-09-30 22:20:27.799332', 'step': 6969, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.830567', 'step': 6969, 'epoch': 1} {'type': 'loss', 'content': 0.13616502285003662, 'timestamp': '2025-09-30 22:20:27.833707', 'step': 6970, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:27.865567', 'step': 6970, 'epoch': 1} {'type': 'loss', 'content': 0.12463866174221039, 'timestamp': '2025-09-30 22:20:27.868018', 'step': 6971, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:27.899055', 'step': 6971, 'epoch': 1} {'type': 'loss', 'content': 0.13690128922462463, 'timestamp': '2025-09-30 22:20:27.923054', 'step': 6972, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:27.954358', 'step': 6972, 'epoch': 1} {'type': 'loss', 'content': 0.31303703784942627, 'timestamp': '2025-09-30 22:20:27.957450', 'step': 6973, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:27.988210', 'step': 6973, 'epoch': 1} {'type': 'loss', 'content': 0.16184072196483612, 'timestamp': '2025-09-30 22:20:27.992855', 'step': 6974, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:28.028450', 'step': 6974, 'epoch': 1} {'type': 'loss', 'content': 0.17523299157619476, 'timestamp': '2025-09-30 22:20:28.032541', 'step': 6975, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.063806', 'step': 6975, 'epoch': 1} {'type': 'loss', 'content': 0.1279531568288803, 'timestamp': '2025-09-30 22:20:28.089594', 'step': 6976, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.120408', 'step': 6976, 'epoch': 1} {'type': 'loss', 'content': 0.20962446928024292, 'timestamp': '2025-09-30 22:20:28.125254', 'step': 6977, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.157883', 'step': 6977, 'epoch': 1} {'type': 'loss', 'content': 0.2665541172027588, 'timestamp': '2025-09-30 22:20:28.163444', 'step': 6978, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.196464', 'step': 6978, 'epoch': 1} {'type': 'loss', 'content': 0.1120101660490036, 'timestamp': '2025-09-30 22:20:28.199793', 'step': 6979, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.231594', 'step': 6979, 'epoch': 1} {'type': 'loss', 'content': 0.1604461967945099, 'timestamp': '2025-09-30 22:20:28.261094', 'step': 6980, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.292709', 'step': 6980, 'epoch': 1} {'type': 'loss', 'content': 0.1368369311094284, 'timestamp': '2025-09-30 22:20:28.296189', 'step': 6981, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.329007', 'step': 6981, 'epoch': 1} {'type': 'loss', 'content': 0.07822607457637787, 'timestamp': '2025-09-30 22:20:28.333838', 'step': 6982, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.369934', 'step': 6982, 'epoch': 1} {'type': 'loss', 'content': 0.1858397275209427, 'timestamp': '2025-09-30 22:20:28.374232', 'step': 6983, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.406829', 'step': 6983, 'epoch': 1} {'type': 'loss', 'content': 0.09268350899219513, 'timestamp': '2025-09-30 22:20:28.432177', 'step': 6984, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.464638', 'step': 6984, 'epoch': 1} {'type': 'loss', 'content': 0.0968797579407692, 'timestamp': '2025-09-30 22:20:28.482775', 'step': 6985, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:28.526615', 'step': 6985, 'epoch': 1} {'type': 'loss', 'content': 0.24534066021442413, 'timestamp': '2025-09-30 22:20:28.529146', 'step': 6986, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.564008', 'step': 6986, 'epoch': 1} {'type': 'loss', 'content': 0.20645412802696228, 'timestamp': '2025-09-30 22:20:28.570018', 'step': 6987, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.603522', 'step': 6987, 'epoch': 1} {'type': 'loss', 'content': 0.11365397274494171, 'timestamp': '2025-09-30 22:20:28.627915', 'step': 6988, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:28.660521', 'step': 6988, 'epoch': 1} {'type': 'loss', 'content': 0.10009853541851044, 'timestamp': '2025-09-30 22:20:28.663421', 'step': 6989, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:28.694712', 'step': 6989, 'epoch': 1} {'type': 'loss', 'content': 0.19156034290790558, 'timestamp': '2025-09-30 22:20:28.698436', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:20:36.576586', 'step': 6990, 'epoch': 1} {'type': 'pplx', 'content': 8450.027408524715, 'timestamp': '2025-09-30 22:20:36.581204', 'step': 6990, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:36.628157', 'step': 6990, 'epoch': 1} {'type': 'loss', 'content': 0.17422638833522797, 'timestamp': '2025-09-30 22:20:36.631268', 'step': 6991, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:36.667718', 'step': 6991, 'epoch': 1} {'type': 'loss', 'content': 0.1552240401506424, 'timestamp': '2025-09-30 22:20:36.691878', 'step': 6992, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:36.722925', 'step': 6992, 'epoch': 1} {'type': 'loss', 'content': 0.18059010803699493, 'timestamp': '2025-09-30 22:20:36.726613', 'step': 6993, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:36.757628', 'step': 6993, 'epoch': 1} {'type': 'loss', 'content': 0.09972290694713593, 'timestamp': '2025-09-30 22:20:36.763481', 'step': 6994, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:36.799904', 'step': 6994, 'epoch': 1} {'type': 'loss', 'content': 0.21886244416236877, 'timestamp': '2025-09-30 22:20:36.810858', 'step': 6995, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:36.847762', 'step': 6995, 'epoch': 1} {'type': 'loss', 'content': 0.07407736778259277, 'timestamp': '2025-09-30 22:20:36.887188', 'step': 6996, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:36.925897', 'step': 6996, 'epoch': 1} {'type': 'loss', 'content': 0.13381342589855194, 'timestamp': '2025-09-30 22:20:36.929893', 'step': 6997, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:36.979438', 'step': 6997, 'epoch': 1} {'type': 'loss', 'content': 0.21058769524097443, 'timestamp': '2025-09-30 22:20:36.986185', 'step': 6998, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:37.027554', 'step': 6998, 'epoch': 1} {'type': 'loss', 'content': 0.10248175263404846, 'timestamp': '2025-09-30 22:20:37.031172', 'step': 6999, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:37.062590', 'step': 6999, 'epoch': 1} {'type': 'loss', 'content': 0.23250965774059296, 'timestamp': '2025-09-30 22:20:37.086916', 'step': 7000, 'epoch': 1} {'type': 'info', 'content': 'Checkpoint saved at step 7000', 'timestamp': '2025-09-30 22:20:42.574623', 'step': 7000, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:42.618066', 'step': 7000, 'epoch': 1} {'type': 'loss', 'content': 0.058958712965250015, 'timestamp': '2025-09-30 22:20:42.620680', 'step': 7001, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:42.651567', 'step': 7001, 'epoch': 1} {'type': 'loss', 'content': 0.15812364220619202, 'timestamp': '2025-09-30 22:20:42.653729', 'step': 7002, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:42.685314', 'step': 7002, 'epoch': 1} {'type': 'loss', 'content': 0.24211984872817993, 'timestamp': '2025-09-30 22:20:42.687907', 'step': 7003, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:42.719248', 'step': 7003, 'epoch': 1} {'type': 'loss', 'content': 0.153473362326622, 'timestamp': '2025-09-30 22:20:42.743150', 'step': 7004, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:42.772909', 'step': 7004, 'epoch': 1} {'type': 'loss', 'content': 0.14667798578739166, 'timestamp': '2025-09-30 22:20:42.775216', 'step': 7005, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:42.818710', 'step': 7005, 'epoch': 1} {'type': 'loss', 'content': 0.130039244890213, 'timestamp': '2025-09-30 22:20:42.820716', 'step': 7006, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:42.850706', 'step': 7006, 'epoch': 1} {'type': 'loss', 'content': 0.14754274487495422, 'timestamp': '2025-09-30 22:20:42.853069', 'step': 7007, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:42.883010', 'step': 7007, 'epoch': 1} {'type': 'loss', 'content': 0.10984225571155548, 'timestamp': '2025-09-30 22:20:42.906962', 'step': 7008, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:42.937269', 'step': 7008, 'epoch': 1} {'type': 'loss', 'content': 0.15676279366016388, 'timestamp': '2025-09-30 22:20:42.939475', 'step': 7009, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:42.970053', 'step': 7009, 'epoch': 1} {'type': 'loss', 'content': 0.10630058497190475, 'timestamp': '2025-09-30 22:20:42.972499', 'step': 7010, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.003188', 'step': 7010, 'epoch': 1} {'type': 'loss', 'content': 0.11357149481773376, 'timestamp': '2025-09-30 22:20:43.005531', 'step': 7011, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.041115', 'step': 7011, 'epoch': 1} {'type': 'loss', 'content': 0.16637413203716278, 'timestamp': '2025-09-30 22:20:43.065060', 'step': 7012, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:43.095954', 'step': 7012, 'epoch': 1} {'type': 'loss', 'content': 0.23912522196769714, 'timestamp': '2025-09-30 22:20:43.098742', 'step': 7013, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.129348', 'step': 7013, 'epoch': 1} {'type': 'loss', 'content': 0.21255557239055634, 'timestamp': '2025-09-30 22:20:43.131555', 'step': 7014, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:43.168096', 'step': 7014, 'epoch': 1} {'type': 'loss', 'content': 0.16013219952583313, 'timestamp': '2025-09-30 22:20:43.170869', 'step': 7015, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.202392', 'step': 7015, 'epoch': 1} {'type': 'loss', 'content': 0.22033846378326416, 'timestamp': '2025-09-30 22:20:43.227495', 'step': 7016, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.260171', 'step': 7016, 'epoch': 1} {'type': 'loss', 'content': 0.21827228367328644, 'timestamp': '2025-09-30 22:20:43.264117', 'step': 7017, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.295293', 'step': 7017, 'epoch': 1} {'type': 'loss', 'content': 0.20850053429603577, 'timestamp': '2025-09-30 22:20:43.299584', 'step': 7018, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.332478', 'step': 7018, 'epoch': 1} {'type': 'loss', 'content': 0.19007837772369385, 'timestamp': '2025-09-30 22:20:43.351923', 'step': 7019, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.400095', 'step': 7019, 'epoch': 1} {'type': 'loss', 'content': 0.103604756295681, 'timestamp': '2025-09-30 22:20:43.440512', 'step': 7020, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:43.487580', 'step': 7020, 'epoch': 1} {'type': 'loss', 'content': 0.17025786638259888, 'timestamp': '2025-09-30 22:20:43.505858', 'step': 7021, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:43.537753', 'step': 7021, 'epoch': 1} {'type': 'loss', 'content': 0.13740046322345734, 'timestamp': '2025-09-30 22:20:43.548128', 'step': 7022, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:43.579224', 'step': 7022, 'epoch': 1} {'type': 'loss', 'content': 0.13652952015399933, 'timestamp': '2025-09-30 22:20:43.581631', 'step': 7023, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.611877', 'step': 7023, 'epoch': 1} {'type': 'loss', 'content': 0.08994916081428528, 'timestamp': '2025-09-30 22:20:43.635773', 'step': 7024, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.666036', 'step': 7024, 'epoch': 1} {'type': 'loss', 'content': 0.1934288740158081, 'timestamp': '2025-09-30 22:20:43.669576', 'step': 7025, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.700219', 'step': 7025, 'epoch': 1} {'type': 'loss', 'content': 0.2679905295372009, 'timestamp': '2025-09-30 22:20:43.702445', 'step': 7026, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.733730', 'step': 7026, 'epoch': 1} {'type': 'loss', 'content': 0.17030125856399536, 'timestamp': '2025-09-30 22:20:43.736519', 'step': 7027, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:43.766958', 'step': 7027, 'epoch': 1} {'type': 'loss', 'content': 0.14387688040733337, 'timestamp': '2025-09-30 22:20:43.791331', 'step': 7028, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:43.821685', 'step': 7028, 'epoch': 1} {'type': 'loss', 'content': 0.10702955722808838, 'timestamp': '2025-09-30 22:20:43.824150', 'step': 7029, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.854174', 'step': 7029, 'epoch': 1} {'type': 'loss', 'content': 0.15368163585662842, 'timestamp': '2025-09-30 22:20:43.857270', 'step': 7030, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.890626', 'step': 7030, 'epoch': 1} {'type': 'loss', 'content': 0.06456245481967926, 'timestamp': '2025-09-30 22:20:43.893352', 'step': 7031, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:43.924002', 'step': 7031, 'epoch': 1} {'type': 'loss', 'content': 0.09222544729709625, 'timestamp': '2025-09-30 22:20:43.947998', 'step': 7032, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:43.979820', 'step': 7032, 'epoch': 1} {'type': 'loss', 'content': 0.13297924399375916, 'timestamp': '2025-09-30 22:20:43.982542', 'step': 7033, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.013394', 'step': 7033, 'epoch': 1} {'type': 'loss', 'content': 0.1523326337337494, 'timestamp': '2025-09-30 22:20:44.015756', 'step': 7034, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:44.046854', 'step': 7034, 'epoch': 1} {'type': 'loss', 'content': 0.09556729346513748, 'timestamp': '2025-09-30 22:20:44.048986', 'step': 7035, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:44.078843', 'step': 7035, 'epoch': 1} {'type': 'loss', 'content': 0.10504192858934402, 'timestamp': '2025-09-30 22:20:44.103091', 'step': 7036, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:44.133467', 'step': 7036, 'epoch': 1} {'type': 'loss', 'content': 0.13906575739383698, 'timestamp': '2025-09-30 22:20:44.137638', 'step': 7037, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.167594', 'step': 7037, 'epoch': 1} {'type': 'loss', 'content': 0.1236720085144043, 'timestamp': '2025-09-30 22:20:44.170060', 'step': 7038, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:44.201041', 'step': 7038, 'epoch': 1} {'type': 'loss', 'content': 0.13857726752758026, 'timestamp': '2025-09-30 22:20:44.206015', 'step': 7039, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.238696', 'step': 7039, 'epoch': 1} {'type': 'loss', 'content': 0.17668142914772034, 'timestamp': '2025-09-30 22:20:44.263655', 'step': 7040, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.295552', 'step': 7040, 'epoch': 1} {'type': 'loss', 'content': 0.10429719090461731, 'timestamp': '2025-09-30 22:20:44.299534', 'step': 7041, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.331586', 'step': 7041, 'epoch': 1} {'type': 'loss', 'content': 0.22759583592414856, 'timestamp': '2025-09-30 22:20:44.334961', 'step': 7042, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.367504', 'step': 7042, 'epoch': 1} {'type': 'loss', 'content': 0.14223229885101318, 'timestamp': '2025-09-30 22:20:44.369709', 'step': 7043, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:44.399654', 'step': 7043, 'epoch': 1} {'type': 'loss', 'content': 0.14861825108528137, 'timestamp': '2025-09-30 22:20:44.424814', 'step': 7044, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:44.455108', 'step': 7044, 'epoch': 1} {'type': 'loss', 'content': 0.11776663362979889, 'timestamp': '2025-09-30 22:20:44.457798', 'step': 7045, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.491908', 'step': 7045, 'epoch': 1} {'type': 'loss', 'content': 0.10097941011190414, 'timestamp': '2025-09-30 22:20:44.494465', 'step': 7046, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.524724', 'step': 7046, 'epoch': 1} {'type': 'loss', 'content': 0.18493345379829407, 'timestamp': '2025-09-30 22:20:44.527887', 'step': 7047, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.559446', 'step': 7047, 'epoch': 1} {'type': 'loss', 'content': 0.19008904695510864, 'timestamp': '2025-09-30 22:20:44.583152', 'step': 7048, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.613018', 'step': 7048, 'epoch': 1} {'type': 'loss', 'content': 0.129231795668602, 'timestamp': '2025-09-30 22:20:44.616027', 'step': 7049, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.646570', 'step': 7049, 'epoch': 1} {'type': 'loss', 'content': 0.12222667038440704, 'timestamp': '2025-09-30 22:20:44.650052', 'step': 7050, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.679807', 'step': 7050, 'epoch': 1} {'type': 'loss', 'content': 0.2204275131225586, 'timestamp': '2025-09-30 22:20:44.682137', 'step': 7051, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:44.712839', 'step': 7051, 'epoch': 1} {'type': 'loss', 'content': 0.2046661376953125, 'timestamp': '2025-09-30 22:20:44.738512', 'step': 7052, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.770741', 'step': 7052, 'epoch': 1} {'type': 'loss', 'content': 0.14892707765102386, 'timestamp': '2025-09-30 22:20:44.772925', 'step': 7053, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:44.803252', 'step': 7053, 'epoch': 1} {'type': 'loss', 'content': 0.1610136181116104, 'timestamp': '2025-09-30 22:20:44.805671', 'step': 7054, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:44.835760', 'step': 7054, 'epoch': 1} {'type': 'loss', 'content': 0.1586248278617859, 'timestamp': '2025-09-30 22:20:44.838160', 'step': 7055, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:44.872735', 'step': 7055, 'epoch': 1} {'type': 'loss', 'content': 0.08331166952848434, 'timestamp': '2025-09-30 22:20:44.896619', 'step': 7056, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:44.926918', 'step': 7056, 'epoch': 1} {'type': 'loss', 'content': 0.1887901872396469, 'timestamp': '2025-09-30 22:20:44.929183', 'step': 7057, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:44.959080', 'step': 7057, 'epoch': 1} {'type': 'loss', 'content': 0.1994640827178955, 'timestamp': '2025-09-30 22:20:44.961883', 'step': 7058, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:44.993751', 'step': 7058, 'epoch': 1} {'type': 'loss', 'content': 0.26501619815826416, 'timestamp': '2025-09-30 22:20:44.996632', 'step': 7059, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:45.027005', 'step': 7059, 'epoch': 1} {'type': 'loss', 'content': 0.29978084564208984, 'timestamp': '2025-09-30 22:20:45.050874', 'step': 7060, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:45.086608', 'step': 7060, 'epoch': 1} {'type': 'loss', 'content': 0.13106176257133484, 'timestamp': '2025-09-30 22:20:45.089244', 'step': 7061, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:45.120575', 'step': 7061, 'epoch': 1} {'type': 'loss', 'content': 0.2815638482570648, 'timestamp': '2025-09-30 22:20:45.122997', 'step': 7062, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.152996', 'step': 7062, 'epoch': 1} {'type': 'loss', 'content': 0.224348783493042, 'timestamp': '2025-09-30 22:20:45.155407', 'step': 7063, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:45.185686', 'step': 7063, 'epoch': 1} {'type': 'loss', 'content': 0.2173374593257904, 'timestamp': '2025-09-30 22:20:45.209571', 'step': 7064, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.244023', 'step': 7064, 'epoch': 1} {'type': 'loss', 'content': 0.1523284614086151, 'timestamp': '2025-09-30 22:20:45.246237', 'step': 7065, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.278932', 'step': 7065, 'epoch': 1} {'type': 'loss', 'content': 0.20130744576454163, 'timestamp': '2025-09-30 22:20:45.284524', 'step': 7066, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:45.315925', 'step': 7066, 'epoch': 1} {'type': 'loss', 'content': 0.18733811378479004, 'timestamp': '2025-09-30 22:20:45.320232', 'step': 7067, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.350710', 'step': 7067, 'epoch': 1} {'type': 'loss', 'content': 0.1025581955909729, 'timestamp': '2025-09-30 22:20:45.375084', 'step': 7068, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.406069', 'step': 7068, 'epoch': 1} {'type': 'loss', 'content': 0.1365411877632141, 'timestamp': '2025-09-30 22:20:45.408689', 'step': 7069, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.438665', 'step': 7069, 'epoch': 1} {'type': 'loss', 'content': 0.11624066531658173, 'timestamp': '2025-09-30 22:20:45.441152', 'step': 7070, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.482393', 'step': 7070, 'epoch': 1} {'type': 'loss', 'content': 0.1883266717195511, 'timestamp': '2025-09-30 22:20:45.484631', 'step': 7071, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.519162', 'step': 7071, 'epoch': 1} {'type': 'loss', 'content': 0.16475220024585724, 'timestamp': '2025-09-30 22:20:45.542921', 'step': 7072, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.573275', 'step': 7072, 'epoch': 1} {'type': 'loss', 'content': 0.14206629991531372, 'timestamp': '2025-09-30 22:20:45.575711', 'step': 7073, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.610158', 'step': 7073, 'epoch': 1} {'type': 'loss', 'content': 0.1466435045003891, 'timestamp': '2025-09-30 22:20:45.612952', 'step': 7074, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.646863', 'step': 7074, 'epoch': 1} {'type': 'loss', 'content': 0.08662496507167816, 'timestamp': '2025-09-30 22:20:45.649043', 'step': 7075, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.679262', 'step': 7075, 'epoch': 1} {'type': 'loss', 'content': 0.09026961773633957, 'timestamp': '2025-09-30 22:20:45.703033', 'step': 7076, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.733363', 'step': 7076, 'epoch': 1} {'type': 'loss', 'content': 0.09986633062362671, 'timestamp': '2025-09-30 22:20:45.736000', 'step': 7077, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:45.765926', 'step': 7077, 'epoch': 1} {'type': 'loss', 'content': 0.1599377542734146, 'timestamp': '2025-09-30 22:20:45.772727', 'step': 7078, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.802869', 'step': 7078, 'epoch': 1} {'type': 'loss', 'content': 0.1689445823431015, 'timestamp': '2025-09-30 22:20:45.807210', 'step': 7079, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.842003', 'step': 7079, 'epoch': 1} {'type': 'loss', 'content': 0.10103315114974976, 'timestamp': '2025-09-30 22:20:45.865814', 'step': 7080, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.895685', 'step': 7080, 'epoch': 1} {'type': 'loss', 'content': 0.14340023696422577, 'timestamp': '2025-09-30 22:20:45.898223', 'step': 7081, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.928647', 'step': 7081, 'epoch': 1} {'type': 'loss', 'content': 0.15689364075660706, 'timestamp': '2025-09-30 22:20:45.931118', 'step': 7082, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:45.963151', 'step': 7082, 'epoch': 1} {'type': 'loss', 'content': 0.11374270170927048, 'timestamp': '2025-09-30 22:20:45.965925', 'step': 7083, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:45.997007', 'step': 7083, 'epoch': 1} {'type': 'loss', 'content': 0.1666984260082245, 'timestamp': '2025-09-30 22:20:46.020648', 'step': 7084, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:46.050798', 'step': 7084, 'epoch': 1} {'type': 'loss', 'content': 0.19116009771823883, 'timestamp': '2025-09-30 22:20:46.053110', 'step': 7085, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.084476', 'step': 7085, 'epoch': 1} {'type': 'loss', 'content': 0.15051522850990295, 'timestamp': '2025-09-30 22:20:46.087104', 'step': 7086, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.123003', 'step': 7086, 'epoch': 1} {'type': 'loss', 'content': 0.1895187944173813, 'timestamp': '2025-09-30 22:20:46.126060', 'step': 7087, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.161618', 'step': 7087, 'epoch': 1} {'type': 'loss', 'content': 0.12628193199634552, 'timestamp': '2025-09-30 22:20:46.185652', 'step': 7088, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.215946', 'step': 7088, 'epoch': 1} {'type': 'loss', 'content': 0.0947992205619812, 'timestamp': '2025-09-30 22:20:46.219282', 'step': 7089, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:46.251457', 'step': 7089, 'epoch': 1} {'type': 'loss', 'content': 0.04926055297255516, 'timestamp': '2025-09-30 22:20:46.254519', 'step': 7090, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.288941', 'step': 7090, 'epoch': 1} {'type': 'loss', 'content': 0.15115965902805328, 'timestamp': '2025-09-30 22:20:46.291299', 'step': 7091, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:46.324030', 'step': 7091, 'epoch': 1} {'type': 'loss', 'content': 0.10387884080410004, 'timestamp': '2025-09-30 22:20:46.347732', 'step': 7092, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:46.406230', 'step': 7092, 'epoch': 1} {'type': 'loss', 'content': 0.10209205001592636, 'timestamp': '2025-09-30 22:20:46.408460', 'step': 7093, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:46.439849', 'step': 7093, 'epoch': 1} {'type': 'loss', 'content': 0.08990873396396637, 'timestamp': '2025-09-30 22:20:46.442209', 'step': 7094, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.473674', 'step': 7094, 'epoch': 1} {'type': 'loss', 'content': 0.20918649435043335, 'timestamp': '2025-09-30 22:20:46.476142', 'step': 7095, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:46.509441', 'step': 7095, 'epoch': 1} {'type': 'loss', 'content': 0.14705513417720795, 'timestamp': '2025-09-30 22:20:46.533161', 'step': 7096, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.563669', 'step': 7096, 'epoch': 1} {'type': 'loss', 'content': 0.12122863531112671, 'timestamp': '2025-09-30 22:20:46.566733', 'step': 7097, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:46.599157', 'step': 7097, 'epoch': 1} {'type': 'loss', 'content': 0.2057342529296875, 'timestamp': '2025-09-30 22:20:46.601514', 'step': 7098, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:46.632249', 'step': 7098, 'epoch': 1} {'type': 'loss', 'content': 0.11596887558698654, 'timestamp': '2025-09-30 22:20:46.635949', 'step': 7099, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:46.670598', 'step': 7099, 'epoch': 1} {'type': 'loss', 'content': 0.17462319135665894, 'timestamp': '2025-09-30 22:20:46.697852', 'step': 7100, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.729017', 'step': 7100, 'epoch': 1} {'type': 'loss', 'content': 0.15770454704761505, 'timestamp': '2025-09-30 22:20:46.732327', 'step': 7101, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:46.764499', 'step': 7101, 'epoch': 1} {'type': 'loss', 'content': 0.2010323852300644, 'timestamp': '2025-09-30 22:20:46.768889', 'step': 7102, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.800602', 'step': 7102, 'epoch': 1} {'type': 'loss', 'content': 0.1391851156949997, 'timestamp': '2025-09-30 22:20:46.803859', 'step': 7103, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:46.835332', 'step': 7103, 'epoch': 1} {'type': 'loss', 'content': 0.18891748785972595, 'timestamp': '2025-09-30 22:20:46.859856', 'step': 7104, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:46.891105', 'step': 7104, 'epoch': 1} {'type': 'loss', 'content': 0.09437505155801773, 'timestamp': '2025-09-30 22:20:46.895527', 'step': 7105, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:46.928223', 'step': 7105, 'epoch': 1} {'type': 'loss', 'content': 0.17117244005203247, 'timestamp': '2025-09-30 22:20:46.931677', 'step': 7106, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:46.963915', 'step': 7106, 'epoch': 1} {'type': 'loss', 'content': 0.10461164265871048, 'timestamp': '2025-09-30 22:20:46.966704', 'step': 7107, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:47.000501', 'step': 7107, 'epoch': 1} {'type': 'loss', 'content': 0.17515911161899567, 'timestamp': '2025-09-30 22:20:47.025953', 'step': 7108, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.057553', 'step': 7108, 'epoch': 1} {'type': 'loss', 'content': 0.0861017256975174, 'timestamp': '2025-09-30 22:20:47.060444', 'step': 7109, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:47.096677', 'step': 7109, 'epoch': 1} {'type': 'loss', 'content': 0.1266872137784958, 'timestamp': '2025-09-30 22:20:47.100228', 'step': 7110, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:47.132051', 'step': 7110, 'epoch': 1} {'type': 'loss', 'content': 0.1386774182319641, 'timestamp': '2025-09-30 22:20:47.135058', 'step': 7111, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.166786', 'step': 7111, 'epoch': 1} {'type': 'loss', 'content': 0.20877774059772491, 'timestamp': '2025-09-30 22:20:47.191060', 'step': 7112, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:47.222463', 'step': 7112, 'epoch': 1} {'type': 'loss', 'content': 0.14053918421268463, 'timestamp': '2025-09-30 22:20:47.227328', 'step': 7113, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:47.260391', 'step': 7113, 'epoch': 1} {'type': 'loss', 'content': 0.08398082852363586, 'timestamp': '2025-09-30 22:20:47.264561', 'step': 7114, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:47.297117', 'step': 7114, 'epoch': 1} {'type': 'loss', 'content': 0.16416305303573608, 'timestamp': '2025-09-30 22:20:47.302133', 'step': 7115, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.335995', 'step': 7115, 'epoch': 1} {'type': 'loss', 'content': 0.09224093705415726, 'timestamp': '2025-09-30 22:20:47.360855', 'step': 7116, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.392803', 'step': 7116, 'epoch': 1} {'type': 'loss', 'content': 0.18988394737243652, 'timestamp': '2025-09-30 22:20:47.397377', 'step': 7117, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.429056', 'step': 7117, 'epoch': 1} {'type': 'loss', 'content': 0.2483605444431305, 'timestamp': '2025-09-30 22:20:47.431897', 'step': 7118, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:47.463925', 'step': 7118, 'epoch': 1} {'type': 'loss', 'content': 0.09527155756950378, 'timestamp': '2025-09-30 22:20:47.467347', 'step': 7119, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.498379', 'step': 7119, 'epoch': 1} {'type': 'loss', 'content': 0.10466920584440231, 'timestamp': '2025-09-30 22:20:47.522835', 'step': 7120, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:47.554408', 'step': 7120, 'epoch': 1} {'type': 'loss', 'content': 0.1768846958875656, 'timestamp': '2025-09-30 22:20:47.557467', 'step': 7121, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.588432', 'step': 7121, 'epoch': 1} {'type': 'loss', 'content': 0.11730902642011642, 'timestamp': '2025-09-30 22:20:47.591541', 'step': 7122, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.623439', 'step': 7122, 'epoch': 1} {'type': 'loss', 'content': 0.14245906472206116, 'timestamp': '2025-09-30 22:20:47.628331', 'step': 7123, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.658999', 'step': 7123, 'epoch': 1} {'type': 'loss', 'content': 0.11037369072437286, 'timestamp': '2025-09-30 22:20:47.683467', 'step': 7124, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.713627', 'step': 7124, 'epoch': 1} {'type': 'loss', 'content': 0.09256312996149063, 'timestamp': '2025-09-30 22:20:47.716942', 'step': 7125, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.747783', 'step': 7125, 'epoch': 1} {'type': 'loss', 'content': 0.10087192058563232, 'timestamp': '2025-09-30 22:20:47.750848', 'step': 7126, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.782412', 'step': 7126, 'epoch': 1} {'type': 'loss', 'content': 0.08433625847101212, 'timestamp': '2025-09-30 22:20:47.785433', 'step': 7127, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:47.816755', 'step': 7127, 'epoch': 1} {'type': 'loss', 'content': 0.2250516563653946, 'timestamp': '2025-09-30 22:20:47.841351', 'step': 7128, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:47.872159', 'step': 7128, 'epoch': 1} {'type': 'loss', 'content': 0.15609034895896912, 'timestamp': '2025-09-30 22:20:47.874830', 'step': 7129, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.905682', 'step': 7129, 'epoch': 1} {'type': 'loss', 'content': 0.20466695725917816, 'timestamp': '2025-09-30 22:20:47.909510', 'step': 7130, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.940445', 'step': 7130, 'epoch': 1} {'type': 'loss', 'content': 0.11400061100721359, 'timestamp': '2025-09-30 22:20:47.943242', 'step': 7131, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:47.974468', 'step': 7131, 'epoch': 1} {'type': 'loss', 'content': 0.17851680517196655, 'timestamp': '2025-09-30 22:20:48.000283', 'step': 7132, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.031756', 'step': 7132, 'epoch': 1} {'type': 'loss', 'content': 0.13395652174949646, 'timestamp': '2025-09-30 22:20:48.034928', 'step': 7133, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:48.066033', 'step': 7133, 'epoch': 1} {'type': 'loss', 'content': 0.10698513686656952, 'timestamp': '2025-09-30 22:20:48.069271', 'step': 7134, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:48.099978', 'step': 7134, 'epoch': 1} {'type': 'loss', 'content': 0.1684715747833252, 'timestamp': '2025-09-30 22:20:48.103086', 'step': 7135, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.133902', 'step': 7135, 'epoch': 1} {'type': 'loss', 'content': 0.15337073802947998, 'timestamp': '2025-09-30 22:20:48.158167', 'step': 7136, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.188885', 'step': 7136, 'epoch': 1} {'type': 'loss', 'content': 0.10100022703409195, 'timestamp': '2025-09-30 22:20:48.191234', 'step': 7137, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.223525', 'step': 7137, 'epoch': 1} {'type': 'loss', 'content': 0.09203266352415085, 'timestamp': '2025-09-30 22:20:48.226339', 'step': 7138, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:48.256936', 'step': 7138, 'epoch': 1} {'type': 'loss', 'content': 0.06264710426330566, 'timestamp': '2025-09-30 22:20:48.259427', 'step': 7139, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.291289', 'step': 7139, 'epoch': 1} {'type': 'loss', 'content': 0.11329535394906998, 'timestamp': '2025-09-30 22:20:48.316641', 'step': 7140, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:48.346672', 'step': 7140, 'epoch': 1} {'type': 'loss', 'content': 0.10179013013839722, 'timestamp': '2025-09-30 22:20:48.350514', 'step': 7141, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.380737', 'step': 7141, 'epoch': 1} {'type': 'loss', 'content': 0.09659246355295181, 'timestamp': '2025-09-30 22:20:48.383049', 'step': 7142, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:48.413317', 'step': 7142, 'epoch': 1} {'type': 'loss', 'content': 0.15575477480888367, 'timestamp': '2025-09-30 22:20:48.415692', 'step': 7143, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:48.445530', 'step': 7143, 'epoch': 1} {'type': 'loss', 'content': 0.1548420637845993, 'timestamp': '2025-09-30 22:20:48.469339', 'step': 7144, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:48.499153', 'step': 7144, 'epoch': 1} {'type': 'loss', 'content': 0.08205954730510712, 'timestamp': '2025-09-30 22:20:48.501374', 'step': 7145, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.532820', 'step': 7145, 'epoch': 1} {'type': 'loss', 'content': 0.16155703365802765, 'timestamp': '2025-09-30 22:20:48.535423', 'step': 7146, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.565943', 'step': 7146, 'epoch': 1} {'type': 'loss', 'content': 0.0963076576590538, 'timestamp': '2025-09-30 22:20:48.568543', 'step': 7147, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.598679', 'step': 7147, 'epoch': 1} {'type': 'loss', 'content': 0.10404814034700394, 'timestamp': '2025-09-30 22:20:48.622695', 'step': 7148, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.654417', 'step': 7148, 'epoch': 1} {'type': 'loss', 'content': 0.1742212027311325, 'timestamp': '2025-09-30 22:20:48.656767', 'step': 7149, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.687267', 'step': 7149, 'epoch': 1} {'type': 'loss', 'content': 0.13380314409732819, 'timestamp': '2025-09-30 22:20:48.689572', 'step': 7150, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.719375', 'step': 7150, 'epoch': 1} {'type': 'loss', 'content': 0.1121576800942421, 'timestamp': '2025-09-30 22:20:48.721575', 'step': 7151, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:48.751856', 'step': 7151, 'epoch': 1} {'type': 'loss', 'content': 0.23407235741615295, 'timestamp': '2025-09-30 22:20:48.777216', 'step': 7152, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.806848', 'step': 7152, 'epoch': 1} {'type': 'loss', 'content': 0.10355640202760696, 'timestamp': '2025-09-30 22:20:48.809122', 'step': 7153, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.839923', 'step': 7153, 'epoch': 1} {'type': 'loss', 'content': 0.1194060817360878, 'timestamp': '2025-09-30 22:20:48.842493', 'step': 7154, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:48.872471', 'step': 7154, 'epoch': 1} {'type': 'loss', 'content': 0.11410222202539444, 'timestamp': '2025-09-30 22:20:48.876111', 'step': 7155, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:48.907000', 'step': 7155, 'epoch': 1} {'type': 'loss', 'content': 0.12132114171981812, 'timestamp': '2025-09-30 22:20:48.933958', 'step': 7156, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:48.964264', 'step': 7156, 'epoch': 1} {'type': 'loss', 'content': 0.12877534329891205, 'timestamp': '2025-09-30 22:20:48.966926', 'step': 7157, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:20:48.997327', 'step': 7157, 'epoch': 1} {'type': 'loss', 'content': 0.16682827472686768, 'timestamp': '2025-09-30 22:20:49.002080', 'step': 7158, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:49.032264', 'step': 7158, 'epoch': 1} {'type': 'loss', 'content': 0.20245026051998138, 'timestamp': '2025-09-30 22:20:49.034780', 'step': 7159, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:49.064539', 'step': 7159, 'epoch': 1} {'type': 'loss', 'content': 0.09210885316133499, 'timestamp': '2025-09-30 22:20:49.088403', 'step': 7160, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:49.118530', 'step': 7160, 'epoch': 1} {'type': 'loss', 'content': 0.2565858066082001, 'timestamp': '2025-09-30 22:20:49.120868', 'step': 7161, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.151110', 'step': 7161, 'epoch': 1} {'type': 'loss', 'content': 0.1558564305305481, 'timestamp': '2025-09-30 22:20:49.154979', 'step': 7162, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:49.186475', 'step': 7162, 'epoch': 1} {'type': 'loss', 'content': 0.1791388988494873, 'timestamp': '2025-09-30 22:20:49.188979', 'step': 7163, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.219282', 'step': 7163, 'epoch': 1} {'type': 'loss', 'content': 0.13941700756549835, 'timestamp': '2025-09-30 22:20:49.244531', 'step': 7164, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.276034', 'step': 7164, 'epoch': 1} {'type': 'loss', 'content': 0.15586739778518677, 'timestamp': '2025-09-30 22:20:49.279747', 'step': 7165, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.311586', 'step': 7165, 'epoch': 1} {'type': 'loss', 'content': 0.10957347601652145, 'timestamp': '2025-09-30 22:20:49.314428', 'step': 7166, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.344158', 'step': 7166, 'epoch': 1} {'type': 'loss', 'content': 0.248260959982872, 'timestamp': '2025-09-30 22:20:49.346869', 'step': 7167, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:49.382935', 'step': 7167, 'epoch': 1} {'type': 'loss', 'content': 0.10443779081106186, 'timestamp': '2025-09-30 22:20:49.406864', 'step': 7168, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:49.438111', 'step': 7168, 'epoch': 1} {'type': 'loss', 'content': 0.19341786205768585, 'timestamp': '2025-09-30 22:20:49.440755', 'step': 7169, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:49.471861', 'step': 7169, 'epoch': 1} {'type': 'loss', 'content': 0.10949484258890152, 'timestamp': '2025-09-30 22:20:49.474199', 'step': 7170, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.504411', 'step': 7170, 'epoch': 1} {'type': 'loss', 'content': 0.136592835187912, 'timestamp': '2025-09-30 22:20:49.506602', 'step': 7171, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.536604', 'step': 7171, 'epoch': 1} {'type': 'loss', 'content': 0.1666136533021927, 'timestamp': '2025-09-30 22:20:49.560383', 'step': 7172, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.590542', 'step': 7172, 'epoch': 1} {'type': 'loss', 'content': 0.15716087818145752, 'timestamp': '2025-09-30 22:20:49.593461', 'step': 7173, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:49.623658', 'step': 7173, 'epoch': 1} {'type': 'loss', 'content': 0.12449963390827179, 'timestamp': '2025-09-30 22:20:49.625944', 'step': 7174, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.656289', 'step': 7174, 'epoch': 1} {'type': 'loss', 'content': 0.13138480484485626, 'timestamp': '2025-09-30 22:20:49.658646', 'step': 7175, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.693824', 'step': 7175, 'epoch': 1} {'type': 'loss', 'content': 0.15561431646347046, 'timestamp': '2025-09-30 22:20:49.718097', 'step': 7176, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.747999', 'step': 7176, 'epoch': 1} {'type': 'loss', 'content': 0.19896848499774933, 'timestamp': '2025-09-30 22:20:49.750319', 'step': 7177, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.781000', 'step': 7177, 'epoch': 1} {'type': 'loss', 'content': 0.09437058866024017, 'timestamp': '2025-09-30 22:20:49.783322', 'step': 7178, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.824444', 'step': 7178, 'epoch': 1} {'type': 'loss', 'content': 0.07895030826330185, 'timestamp': '2025-09-30 22:20:49.826628', 'step': 7179, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:49.856700', 'step': 7179, 'epoch': 1} {'type': 'loss', 'content': 0.14354753494262695, 'timestamp': '2025-09-30 22:20:49.882446', 'step': 7180, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.913105', 'step': 7180, 'epoch': 1} {'type': 'loss', 'content': 0.23219887912273407, 'timestamp': '2025-09-30 22:20:49.915567', 'step': 7181, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:49.946314', 'step': 7181, 'epoch': 1} {'type': 'loss', 'content': 0.13645108044147491, 'timestamp': '2025-09-30 22:20:49.948593', 'step': 7182, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:49.978781', 'step': 7182, 'epoch': 1} {'type': 'loss', 'content': 0.14765004813671112, 'timestamp': '2025-09-30 22:20:49.981165', 'step': 7183, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:50.011744', 'step': 7183, 'epoch': 1} {'type': 'loss', 'content': 0.2021426260471344, 'timestamp': '2025-09-30 22:20:50.040191', 'step': 7184, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.078570', 'step': 7184, 'epoch': 1} {'type': 'loss', 'content': 0.1845070719718933, 'timestamp': '2025-09-30 22:20:50.084236', 'step': 7185, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.114068', 'step': 7185, 'epoch': 1} {'type': 'loss', 'content': 0.09940873831510544, 'timestamp': '2025-09-30 22:20:50.119545', 'step': 7186, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.149796', 'step': 7186, 'epoch': 1} {'type': 'loss', 'content': 0.1834196299314499, 'timestamp': '2025-09-30 22:20:50.152336', 'step': 7187, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.182868', 'step': 7187, 'epoch': 1} {'type': 'loss', 'content': 0.14432695508003235, 'timestamp': '2025-09-30 22:20:50.206782', 'step': 7188, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:50.242489', 'step': 7188, 'epoch': 1} {'type': 'loss', 'content': 0.22593429684638977, 'timestamp': '2025-09-30 22:20:50.244824', 'step': 7189, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:50.275667', 'step': 7189, 'epoch': 1} {'type': 'loss', 'content': 0.11461656540632248, 'timestamp': '2025-09-30 22:20:50.278648', 'step': 7190, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.309993', 'step': 7190, 'epoch': 1} {'type': 'loss', 'content': 0.1601867377758026, 'timestamp': '2025-09-30 22:20:50.314946', 'step': 7191, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.345066', 'step': 7191, 'epoch': 1} {'type': 'loss', 'content': 0.19897550344467163, 'timestamp': '2025-09-30 22:20:50.368918', 'step': 7192, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:50.398913', 'step': 7192, 'epoch': 1} {'type': 'loss', 'content': 0.17759183049201965, 'timestamp': '2025-09-30 22:20:50.404262', 'step': 7193, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.436256', 'step': 7193, 'epoch': 1} {'type': 'loss', 'content': 0.1553102433681488, 'timestamp': '2025-09-30 22:20:50.440403', 'step': 7194, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:50.472798', 'step': 7194, 'epoch': 1} {'type': 'loss', 'content': 0.18232864141464233, 'timestamp': '2025-09-30 22:20:50.477338', 'step': 7195, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:50.510102', 'step': 7195, 'epoch': 1} {'type': 'loss', 'content': 0.1640712469816208, 'timestamp': '2025-09-30 22:20:50.535363', 'step': 7196, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:50.565382', 'step': 7196, 'epoch': 1} {'type': 'loss', 'content': 0.20879165828227997, 'timestamp': '2025-09-30 22:20:50.569865', 'step': 7197, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.600368', 'step': 7197, 'epoch': 1} {'type': 'loss', 'content': 0.06769507378339767, 'timestamp': '2025-09-30 22:20:50.605979', 'step': 7198, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:50.640731', 'step': 7198, 'epoch': 1} {'type': 'loss', 'content': 0.14756259322166443, 'timestamp': '2025-09-30 22:20:50.648963', 'step': 7199, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:50.685599', 'step': 7199, 'epoch': 1} {'type': 'loss', 'content': 0.12482746690511703, 'timestamp': '2025-09-30 22:20:50.715381', 'step': 7200, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:50.753291', 'step': 7200, 'epoch': 1} {'type': 'loss', 'content': 0.0807347446680069, 'timestamp': '2025-09-30 22:20:50.760823', 'step': 7201, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:50.796026', 'step': 7201, 'epoch': 1} {'type': 'loss', 'content': 0.14066404104232788, 'timestamp': '2025-09-30 22:20:50.798650', 'step': 7202, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:50.829999', 'step': 7202, 'epoch': 1} {'type': 'loss', 'content': 0.23673605918884277, 'timestamp': '2025-09-30 22:20:50.838682', 'step': 7203, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:50.880237', 'step': 7203, 'epoch': 1} {'type': 'loss', 'content': 0.1780785471200943, 'timestamp': '2025-09-30 22:20:50.917857', 'step': 7204, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:50.950084', 'step': 7204, 'epoch': 1} {'type': 'loss', 'content': 0.23538900911808014, 'timestamp': '2025-09-30 22:20:50.958776', 'step': 7205, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:50.993654', 'step': 7205, 'epoch': 1} {'type': 'loss', 'content': 0.11661763489246368, 'timestamp': '2025-09-30 22:20:51.002684', 'step': 7206, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.038296', 'step': 7206, 'epoch': 1} {'type': 'loss', 'content': 0.20733793079853058, 'timestamp': '2025-09-30 22:20:51.047392', 'step': 7207, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.089267', 'step': 7207, 'epoch': 1} {'type': 'loss', 'content': 0.1521531194448471, 'timestamp': '2025-09-30 22:20:51.119879', 'step': 7208, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:51.151550', 'step': 7208, 'epoch': 1} {'type': 'loss', 'content': 0.15410833060741425, 'timestamp': '2025-09-30 22:20:51.162507', 'step': 7209, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:51.198335', 'step': 7209, 'epoch': 1} {'type': 'loss', 'content': 0.12287496775388718, 'timestamp': '2025-09-30 22:20:51.202872', 'step': 7210, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.243827', 'step': 7210, 'epoch': 1} {'type': 'loss', 'content': 0.08620941638946533, 'timestamp': '2025-09-30 22:20:51.247506', 'step': 7211, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:51.283455', 'step': 7211, 'epoch': 1} {'type': 'loss', 'content': 0.11853145062923431, 'timestamp': '2025-09-30 22:20:51.316141', 'step': 7212, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.354450', 'step': 7212, 'epoch': 1} {'type': 'loss', 'content': 0.13856564462184906, 'timestamp': '2025-09-30 22:20:51.357382', 'step': 7213, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:51.389911', 'step': 7213, 'epoch': 1} {'type': 'loss', 'content': 0.1749529093503952, 'timestamp': '2025-09-30 22:20:51.400049', 'step': 7214, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.438904', 'step': 7214, 'epoch': 1} {'type': 'loss', 'content': 0.11720874905586243, 'timestamp': '2025-09-30 22:20:51.450279', 'step': 7215, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.488038', 'step': 7215, 'epoch': 1} {'type': 'loss', 'content': 0.21598635613918304, 'timestamp': '2025-09-30 22:20:51.518330', 'step': 7216, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:51.557261', 'step': 7216, 'epoch': 1} {'type': 'loss', 'content': 0.16744676232337952, 'timestamp': '2025-09-30 22:20:51.566374', 'step': 7217, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.599577', 'step': 7217, 'epoch': 1} {'type': 'loss', 'content': 0.08546007424592972, 'timestamp': '2025-09-30 22:20:51.610851', 'step': 7218, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:51.646717', 'step': 7218, 'epoch': 1} {'type': 'loss', 'content': 0.0857166275382042, 'timestamp': '2025-09-30 22:20:51.660006', 'step': 7219, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:51.699224', 'step': 7219, 'epoch': 1} {'type': 'loss', 'content': 0.14766350388526917, 'timestamp': '2025-09-30 22:20:51.731158', 'step': 7220, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:51.771398', 'step': 7220, 'epoch': 1} {'type': 'loss', 'content': 0.13676412403583527, 'timestamp': '2025-09-30 22:20:51.780815', 'step': 7221, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:51.819391', 'step': 7221, 'epoch': 1} {'type': 'loss', 'content': 0.12353532761335373, 'timestamp': '2025-09-30 22:20:51.822761', 'step': 7222, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:51.858166', 'step': 7222, 'epoch': 1} {'type': 'loss', 'content': 0.1569376438856125, 'timestamp': '2025-09-30 22:20:51.867757', 'step': 7223, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:51.904847', 'step': 7223, 'epoch': 1} {'type': 'loss', 'content': 0.1142135038971901, 'timestamp': '2025-09-30 22:20:51.929490', 'step': 7224, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:51.960073', 'step': 7224, 'epoch': 1} {'type': 'loss', 'content': 0.21149607002735138, 'timestamp': '2025-09-30 22:20:51.967478', 'step': 7225, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:52.009154', 'step': 7225, 'epoch': 1} {'type': 'loss', 'content': 0.14922133088111877, 'timestamp': '2025-09-30 22:20:52.012316', 'step': 7226, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:52.044510', 'step': 7226, 'epoch': 1} {'type': 'loss', 'content': 0.11910325288772583, 'timestamp': '2025-09-30 22:20:52.048881', 'step': 7227, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.086624', 'step': 7227, 'epoch': 1} {'type': 'loss', 'content': 0.0944121852517128, 'timestamp': '2025-09-30 22:20:52.116824', 'step': 7228, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:52.148978', 'step': 7228, 'epoch': 1} {'type': 'loss', 'content': 0.1686822921037674, 'timestamp': '2025-09-30 22:20:52.152226', 'step': 7229, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:52.185491', 'step': 7229, 'epoch': 1} {'type': 'loss', 'content': 0.10811001062393188, 'timestamp': '2025-09-30 22:20:52.188525', 'step': 7230, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.219873', 'step': 7230, 'epoch': 1} {'type': 'loss', 'content': 0.17631228268146515, 'timestamp': '2025-09-30 22:20:52.229694', 'step': 7231, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.260647', 'step': 7231, 'epoch': 1} {'type': 'loss', 'content': 0.09750421345233917, 'timestamp': '2025-09-30 22:20:52.291314', 'step': 7232, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.330343', 'step': 7232, 'epoch': 1} {'type': 'loss', 'content': 0.106832355260849, 'timestamp': '2025-09-30 22:20:52.340936', 'step': 7233, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.387988', 'step': 7233, 'epoch': 1} {'type': 'loss', 'content': 0.09297266602516174, 'timestamp': '2025-09-30 22:20:52.399006', 'step': 7234, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.438078', 'step': 7234, 'epoch': 1} {'type': 'loss', 'content': 0.06523825228214264, 'timestamp': '2025-09-30 22:20:52.448619', 'step': 7235, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:52.485211', 'step': 7235, 'epoch': 1} {'type': 'loss', 'content': 0.08939672261476517, 'timestamp': '2025-09-30 22:20:52.512196', 'step': 7236, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.552532', 'step': 7236, 'epoch': 1} {'type': 'loss', 'content': 0.11777807027101517, 'timestamp': '2025-09-30 22:20:52.563233', 'step': 7237, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.606863', 'step': 7237, 'epoch': 1} {'type': 'loss', 'content': 0.07352698594331741, 'timestamp': '2025-09-30 22:20:52.618538', 'step': 7238, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:52.658354', 'step': 7238, 'epoch': 1} {'type': 'loss', 'content': 0.12560723721981049, 'timestamp': '2025-09-30 22:20:52.668483', 'step': 7239, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:52.705072', 'step': 7239, 'epoch': 1} {'type': 'loss', 'content': 0.23215709626674652, 'timestamp': '2025-09-30 22:20:52.730265', 'step': 7240, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:52.761707', 'step': 7240, 'epoch': 1} {'type': 'loss', 'content': 0.11113770306110382, 'timestamp': '2025-09-30 22:20:52.766074', 'step': 7241, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:52.803996', 'step': 7241, 'epoch': 1} {'type': 'loss', 'content': 0.06607978790998459, 'timestamp': '2025-09-30 22:20:52.813210', 'step': 7242, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:52.853018', 'step': 7242, 'epoch': 1} {'type': 'loss', 'content': 0.21603228151798248, 'timestamp': '2025-09-30 22:20:52.861409', 'step': 7243, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:52.897029', 'step': 7243, 'epoch': 1} {'type': 'loss', 'content': 0.11396213620901108, 'timestamp': '2025-09-30 22:20:52.921496', 'step': 7244, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:52.965907', 'step': 7244, 'epoch': 1} {'type': 'loss', 'content': 0.18599554896354675, 'timestamp': '2025-09-30 22:20:52.975094', 'step': 7245, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.024394', 'step': 7245, 'epoch': 1} {'type': 'loss', 'content': 0.1473665088415146, 'timestamp': '2025-09-30 22:20:53.034965', 'step': 7246, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.073512', 'step': 7246, 'epoch': 1} {'type': 'loss', 'content': 0.09436960518360138, 'timestamp': '2025-09-30 22:20:53.084610', 'step': 7247, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.122537', 'step': 7247, 'epoch': 1} {'type': 'loss', 'content': 0.18358208239078522, 'timestamp': '2025-09-30 22:20:53.152854', 'step': 7248, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:53.191449', 'step': 7248, 'epoch': 1} {'type': 'loss', 'content': 0.09812121838331223, 'timestamp': '2025-09-30 22:20:53.195542', 'step': 7249, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.226433', 'step': 7249, 'epoch': 1} {'type': 'loss', 'content': 0.11158431321382523, 'timestamp': '2025-09-30 22:20:53.230252', 'step': 7250, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.261800', 'step': 7250, 'epoch': 1} {'type': 'loss', 'content': 0.1173638105392456, 'timestamp': '2025-09-30 22:20:53.273371', 'step': 7251, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.311285', 'step': 7251, 'epoch': 1} {'type': 'loss', 'content': 0.10190834850072861, 'timestamp': '2025-09-30 22:20:53.345310', 'step': 7252, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.384710', 'step': 7252, 'epoch': 1} {'type': 'loss', 'content': 0.20689406991004944, 'timestamp': '2025-09-30 22:20:53.389175', 'step': 7253, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:53.420519', 'step': 7253, 'epoch': 1} {'type': 'loss', 'content': 0.12775716185569763, 'timestamp': '2025-09-30 22:20:53.439998', 'step': 7254, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.485813', 'step': 7254, 'epoch': 1} {'type': 'loss', 'content': 0.09802455455064774, 'timestamp': '2025-09-30 22:20:53.506345', 'step': 7255, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.553049', 'step': 7255, 'epoch': 1} {'type': 'loss', 'content': 0.14503805339336395, 'timestamp': '2025-09-30 22:20:53.595486', 'step': 7256, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:53.627849', 'step': 7256, 'epoch': 1} {'type': 'loss', 'content': 0.13472960889339447, 'timestamp': '2025-09-30 22:20:53.648326', 'step': 7257, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.695075', 'step': 7257, 'epoch': 1} {'type': 'loss', 'content': 0.1620021015405655, 'timestamp': '2025-09-30 22:20:53.703783', 'step': 7258, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:53.746705', 'step': 7258, 'epoch': 1} {'type': 'loss', 'content': 0.1683727651834488, 'timestamp': '2025-09-30 22:20:53.756813', 'step': 7259, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.796857', 'step': 7259, 'epoch': 1} {'type': 'loss', 'content': 0.23391813039779663, 'timestamp': '2025-09-30 22:20:53.825528', 'step': 7260, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:53.855567', 'step': 7260, 'epoch': 1} {'type': 'loss', 'content': 0.2621281147003174, 'timestamp': '2025-09-30 22:20:53.864054', 'step': 7261, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:53.900490', 'step': 7261, 'epoch': 1} {'type': 'loss', 'content': 0.10332009196281433, 'timestamp': '2025-09-30 22:20:53.908945', 'step': 7262, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.946809', 'step': 7262, 'epoch': 1} {'type': 'loss', 'content': 0.13114650547504425, 'timestamp': '2025-09-30 22:20:53.956514', 'step': 7263, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:53.999583', 'step': 7263, 'epoch': 1} {'type': 'loss', 'content': 0.10936779528856277, 'timestamp': '2025-09-30 22:20:54.023595', 'step': 7264, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.066175', 'step': 7264, 'epoch': 1} {'type': 'loss', 'content': 0.20027844607830048, 'timestamp': '2025-09-30 22:20:54.073230', 'step': 7265, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:54.110705', 'step': 7265, 'epoch': 1} {'type': 'loss', 'content': 0.14348724484443665, 'timestamp': '2025-09-30 22:20:54.117514', 'step': 7266, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.151150', 'step': 7266, 'epoch': 1} {'type': 'loss', 'content': 0.054387278854846954, 'timestamp': '2025-09-30 22:20:54.156714', 'step': 7267, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.190316', 'step': 7267, 'epoch': 1} {'type': 'loss', 'content': 0.14110203087329865, 'timestamp': '2025-09-30 22:20:54.218830', 'step': 7268, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.253016', 'step': 7268, 'epoch': 1} {'type': 'loss', 'content': 0.22383806109428406, 'timestamp': '2025-09-30 22:20:54.256556', 'step': 7269, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.287741', 'step': 7269, 'epoch': 1} {'type': 'loss', 'content': 0.17819266021251678, 'timestamp': '2025-09-30 22:20:54.302084', 'step': 7270, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.342437', 'step': 7270, 'epoch': 1} {'type': 'loss', 'content': 0.1990060806274414, 'timestamp': '2025-09-30 22:20:54.353789', 'step': 7271, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:54.385177', 'step': 7271, 'epoch': 1} {'type': 'loss', 'content': 0.1495240479707718, 'timestamp': '2025-09-30 22:20:54.413881', 'step': 7272, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.453327', 'step': 7272, 'epoch': 1} {'type': 'loss', 'content': 0.15352587401866913, 'timestamp': '2025-09-30 22:20:54.459688', 'step': 7273, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:54.495596', 'step': 7273, 'epoch': 1} {'type': 'loss', 'content': 0.14307208359241486, 'timestamp': '2025-09-30 22:20:54.504291', 'step': 7274, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.540351', 'step': 7274, 'epoch': 1} {'type': 'loss', 'content': 0.07160203158855438, 'timestamp': '2025-09-30 22:20:54.547619', 'step': 7275, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:54.584645', 'step': 7275, 'epoch': 1} {'type': 'loss', 'content': 0.1844187080860138, 'timestamp': '2025-09-30 22:20:54.615908', 'step': 7276, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.654396', 'step': 7276, 'epoch': 1} {'type': 'loss', 'content': 0.0926065519452095, 'timestamp': '2025-09-30 22:20:54.661026', 'step': 7277, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:54.696425', 'step': 7277, 'epoch': 1} {'type': 'loss', 'content': 0.22819627821445465, 'timestamp': '2025-09-30 22:20:54.699552', 'step': 7278, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.736050', 'step': 7278, 'epoch': 1} {'type': 'loss', 'content': 0.1143067330121994, 'timestamp': '2025-09-30 22:20:54.741678', 'step': 7279, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.775381', 'step': 7279, 'epoch': 1} {'type': 'loss', 'content': 0.18776294589042664, 'timestamp': '2025-09-30 22:20:54.800348', 'step': 7280, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.836189', 'step': 7280, 'epoch': 1} {'type': 'loss', 'content': 0.26972636580467224, 'timestamp': '2025-09-30 22:20:54.846626', 'step': 7281, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:54.884336', 'step': 7281, 'epoch': 1} {'type': 'loss', 'content': 0.11320476979017258, 'timestamp': '2025-09-30 22:20:54.887039', 'step': 7282, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.925720', 'step': 7282, 'epoch': 1} {'type': 'loss', 'content': 0.14503873884677887, 'timestamp': '2025-09-30 22:20:54.929136', 'step': 7283, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:54.963197', 'step': 7283, 'epoch': 1} {'type': 'loss', 'content': 0.041885826736688614, 'timestamp': '2025-09-30 22:20:54.996255', 'step': 7284, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:55.030600', 'step': 7284, 'epoch': 1} {'type': 'loss', 'content': 0.06890028715133667, 'timestamp': '2025-09-30 22:20:55.033295', 'step': 7285, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:55.066398', 'step': 7285, 'epoch': 1} {'type': 'loss', 'content': 0.13695783913135529, 'timestamp': '2025-09-30 22:20:55.070170', 'step': 7286, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.115474', 'step': 7286, 'epoch': 1} {'type': 'loss', 'content': 0.09854838997125626, 'timestamp': '2025-09-30 22:20:55.118659', 'step': 7287, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.150112', 'step': 7287, 'epoch': 1} {'type': 'loss', 'content': 0.12969090044498444, 'timestamp': '2025-09-30 22:20:55.181383', 'step': 7288, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:55.220032', 'step': 7288, 'epoch': 1} {'type': 'loss', 'content': 0.08256148546934128, 'timestamp': '2025-09-30 22:20:55.223415', 'step': 7289, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.261699', 'step': 7289, 'epoch': 1} {'type': 'loss', 'content': 0.15784373879432678, 'timestamp': '2025-09-30 22:20:55.273206', 'step': 7290, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.313325', 'step': 7290, 'epoch': 1} {'type': 'loss', 'content': 0.12959560751914978, 'timestamp': '2025-09-30 22:20:55.322771', 'step': 7291, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.364162', 'step': 7291, 'epoch': 1} {'type': 'loss', 'content': 0.17029716074466705, 'timestamp': '2025-09-30 22:20:55.395219', 'step': 7292, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.431467', 'step': 7292, 'epoch': 1} {'type': 'loss', 'content': 0.15507443249225616, 'timestamp': '2025-09-30 22:20:55.440762', 'step': 7293, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.473539', 'step': 7293, 'epoch': 1} {'type': 'loss', 'content': 0.12932224571704865, 'timestamp': '2025-09-30 22:20:55.481586', 'step': 7294, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.518128', 'step': 7294, 'epoch': 1} {'type': 'loss', 'content': 0.14945088326931, 'timestamp': '2025-09-30 22:20:55.526329', 'step': 7295, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.562148', 'step': 7295, 'epoch': 1} {'type': 'loss', 'content': 0.06809260696172714, 'timestamp': '2025-09-30 22:20:55.592500', 'step': 7296, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.632440', 'step': 7296, 'epoch': 1} {'type': 'loss', 'content': 0.14203448593616486, 'timestamp': '2025-09-30 22:20:55.643546', 'step': 7297, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.680096', 'step': 7297, 'epoch': 1} {'type': 'loss', 'content': 0.15271519124507904, 'timestamp': '2025-09-30 22:20:55.687984', 'step': 7298, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.727159', 'step': 7298, 'epoch': 1} {'type': 'loss', 'content': 0.08757337182760239, 'timestamp': '2025-09-30 22:20:55.734400', 'step': 7299, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.771589', 'step': 7299, 'epoch': 1} {'type': 'loss', 'content': 0.11801581084728241, 'timestamp': '2025-09-30 22:20:55.802080', 'step': 7300, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.833893', 'step': 7300, 'epoch': 1} {'type': 'loss', 'content': 0.16974695026874542, 'timestamp': '2025-09-30 22:20:55.850906', 'step': 7301, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:55.897325', 'step': 7301, 'epoch': 1} {'type': 'loss', 'content': 0.15748602151870728, 'timestamp': '2025-09-30 22:20:55.906370', 'step': 7302, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:55.942714', 'step': 7302, 'epoch': 1} {'type': 'loss', 'content': 0.14332984387874603, 'timestamp': '2025-09-30 22:20:55.948411', 'step': 7303, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:55.982602', 'step': 7303, 'epoch': 1} {'type': 'loss', 'content': 0.09384971112012863, 'timestamp': '2025-09-30 22:20:56.013044', 'step': 7304, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:56.049006', 'step': 7304, 'epoch': 1} {'type': 'loss', 'content': 0.13277599215507507, 'timestamp': '2025-09-30 22:20:56.055614', 'step': 7305, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.089015', 'step': 7305, 'epoch': 1} {'type': 'loss', 'content': 0.17398177087306976, 'timestamp': '2025-09-30 22:20:56.095803', 'step': 7306, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:56.131394', 'step': 7306, 'epoch': 1} {'type': 'loss', 'content': 0.12065888196229935, 'timestamp': '2025-09-30 22:20:56.138236', 'step': 7307, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.172396', 'step': 7307, 'epoch': 1} {'type': 'loss', 'content': 0.15654724836349487, 'timestamp': '2025-09-30 22:20:56.201611', 'step': 7308, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.238794', 'step': 7308, 'epoch': 1} {'type': 'loss', 'content': 0.1836325228214264, 'timestamp': '2025-09-30 22:20:56.245994', 'step': 7309, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:56.281853', 'step': 7309, 'epoch': 1} {'type': 'loss', 'content': 0.18490701913833618, 'timestamp': '2025-09-30 22:20:56.288711', 'step': 7310, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:56.323231', 'step': 7310, 'epoch': 1} {'type': 'loss', 'content': 0.15011189877986908, 'timestamp': '2025-09-30 22:20:56.332368', 'step': 7311, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.370889', 'step': 7311, 'epoch': 1} {'type': 'loss', 'content': 0.16612285375595093, 'timestamp': '2025-09-30 22:20:56.399856', 'step': 7312, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:56.435394', 'step': 7312, 'epoch': 1} {'type': 'loss', 'content': 0.15311512351036072, 'timestamp': '2025-09-30 22:20:56.443326', 'step': 7313, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:56.478015', 'step': 7313, 'epoch': 1} {'type': 'loss', 'content': 0.16660401225090027, 'timestamp': '2025-09-30 22:20:56.485338', 'step': 7314, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.521896', 'step': 7314, 'epoch': 1} {'type': 'loss', 'content': 0.1862507164478302, 'timestamp': '2025-09-30 22:20:56.529462', 'step': 7315, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.560496', 'step': 7315, 'epoch': 1} {'type': 'loss', 'content': 0.10843346267938614, 'timestamp': '2025-09-30 22:20:56.589923', 'step': 7316, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:56.629054', 'step': 7316, 'epoch': 1} {'type': 'loss', 'content': 0.20138861238956451, 'timestamp': '2025-09-30 22:20:56.637343', 'step': 7317, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:56.668759', 'step': 7317, 'epoch': 1} {'type': 'loss', 'content': 0.18175408244132996, 'timestamp': '2025-09-30 22:20:56.671631', 'step': 7318, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.709794', 'step': 7318, 'epoch': 1} {'type': 'loss', 'content': 0.1111707091331482, 'timestamp': '2025-09-30 22:20:56.721559', 'step': 7319, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.764144', 'step': 7319, 'epoch': 1} {'type': 'loss', 'content': 0.17443692684173584, 'timestamp': '2025-09-30 22:20:56.798754', 'step': 7320, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.831751', 'step': 7320, 'epoch': 1} {'type': 'loss', 'content': 0.25234314799308777, 'timestamp': '2025-09-30 22:20:56.835240', 'step': 7321, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.866637', 'step': 7321, 'epoch': 1} {'type': 'loss', 'content': 0.19382593035697937, 'timestamp': '2025-09-30 22:20:56.870372', 'step': 7322, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:56.910025', 'step': 7322, 'epoch': 1} {'type': 'loss', 'content': 0.14694543182849884, 'timestamp': '2025-09-30 22:20:56.914957', 'step': 7323, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:56.945663', 'step': 7323, 'epoch': 1} {'type': 'loss', 'content': 0.08859571069478989, 'timestamp': '2025-09-30 22:20:56.971045', 'step': 7324, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.002879', 'step': 7324, 'epoch': 1} {'type': 'loss', 'content': 0.0972103402018547, 'timestamp': '2025-09-30 22:20:57.005409', 'step': 7325, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.042299', 'step': 7325, 'epoch': 1} {'type': 'loss', 'content': 0.14704695343971252, 'timestamp': '2025-09-30 22:20:57.053728', 'step': 7326, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.089698', 'step': 7326, 'epoch': 1} {'type': 'loss', 'content': 0.112030029296875, 'timestamp': '2025-09-30 22:20:57.101351', 'step': 7327, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:57.138890', 'step': 7327, 'epoch': 1} {'type': 'loss', 'content': 0.16178728640079498, 'timestamp': '2025-09-30 22:20:57.170282', 'step': 7328, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:57.207325', 'step': 7328, 'epoch': 1} {'type': 'loss', 'content': 0.10380289703607559, 'timestamp': '2025-09-30 22:20:57.215441', 'step': 7329, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.252727', 'step': 7329, 'epoch': 1} {'type': 'loss', 'content': 0.198601633310318, 'timestamp': '2025-09-30 22:20:57.261727', 'step': 7330, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.293670', 'step': 7330, 'epoch': 1} {'type': 'loss', 'content': 0.11364025622606277, 'timestamp': '2025-09-30 22:20:57.307921', 'step': 7331, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.350439', 'step': 7331, 'epoch': 1} {'type': 'loss', 'content': 0.16712594032287598, 'timestamp': '2025-09-30 22:20:57.374889', 'step': 7332, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.405220', 'step': 7332, 'epoch': 1} {'type': 'loss', 'content': 0.15614530444145203, 'timestamp': '2025-09-30 22:20:57.407887', 'step': 7333, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.439327', 'step': 7333, 'epoch': 1} {'type': 'loss', 'content': 0.09364181011915207, 'timestamp': '2025-09-30 22:20:57.449687', 'step': 7334, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.488564', 'step': 7334, 'epoch': 1} {'type': 'loss', 'content': 0.09450306743383408, 'timestamp': '2025-09-30 22:20:57.498014', 'step': 7335, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:57.528565', 'step': 7335, 'epoch': 1} {'type': 'loss', 'content': 0.17294718325138092, 'timestamp': '2025-09-30 22:20:57.555661', 'step': 7336, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.594384', 'step': 7336, 'epoch': 1} {'type': 'loss', 'content': 0.2132890224456787, 'timestamp': '2025-09-30 22:20:57.606754', 'step': 7337, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.647388', 'step': 7337, 'epoch': 1} {'type': 'loss', 'content': 0.11286386102437973, 'timestamp': '2025-09-30 22:20:57.658327', 'step': 7338, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:20:57.696307', 'step': 7338, 'epoch': 1} {'type': 'loss', 'content': 0.1639161854982376, 'timestamp': '2025-09-30 22:20:57.705123', 'step': 7339, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.743603', 'step': 7339, 'epoch': 1} {'type': 'loss', 'content': 0.16403572261333466, 'timestamp': '2025-09-30 22:20:57.772821', 'step': 7340, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:57.808682', 'step': 7340, 'epoch': 1} {'type': 'loss', 'content': 0.18958216905593872, 'timestamp': '2025-09-30 22:20:57.811738', 'step': 7341, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.843051', 'step': 7341, 'epoch': 1} {'type': 'loss', 'content': 0.16654637455940247, 'timestamp': '2025-09-30 22:20:57.851706', 'step': 7342, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:57.888013', 'step': 7342, 'epoch': 1} {'type': 'loss', 'content': 0.11906698346138, 'timestamp': '2025-09-30 22:20:57.898390', 'step': 7343, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:57.933396', 'step': 7343, 'epoch': 1} {'type': 'loss', 'content': 0.257824182510376, 'timestamp': '2025-09-30 22:20:57.963783', 'step': 7344, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.000635', 'step': 7344, 'epoch': 1} {'type': 'loss', 'content': 0.0883556604385376, 'timestamp': '2025-09-30 22:20:58.010883', 'step': 7345, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.047567', 'step': 7345, 'epoch': 1} {'type': 'loss', 'content': 0.12301810085773468, 'timestamp': '2025-09-30 22:20:58.059630', 'step': 7346, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.098439', 'step': 7346, 'epoch': 1} {'type': 'loss', 'content': 0.12234807759523392, 'timestamp': '2025-09-30 22:20:58.117164', 'step': 7347, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.148735', 'step': 7347, 'epoch': 1} {'type': 'loss', 'content': 0.12427518516778946, 'timestamp': '2025-09-30 22:20:58.190038', 'step': 7348, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:58.238593', 'step': 7348, 'epoch': 1} {'type': 'loss', 'content': 0.06799253076314926, 'timestamp': '2025-09-30 22:20:58.258385', 'step': 7349, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:58.306498', 'step': 7349, 'epoch': 1} {'type': 'loss', 'content': 0.10367120057344437, 'timestamp': '2025-09-30 22:20:58.319885', 'step': 7350, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.353083', 'step': 7350, 'epoch': 1} {'type': 'loss', 'content': 0.1149938777089119, 'timestamp': '2025-09-30 22:20:58.370130', 'step': 7351, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:58.412687', 'step': 7351, 'epoch': 1} {'type': 'loss', 'content': 0.16668953001499176, 'timestamp': '2025-09-30 22:20:58.441361', 'step': 7352, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:58.481222', 'step': 7352, 'epoch': 1} {'type': 'loss', 'content': 0.15801464021205902, 'timestamp': '2025-09-30 22:20:58.487794', 'step': 7353, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:58.532084', 'step': 7353, 'epoch': 1} {'type': 'loss', 'content': 0.1962548792362213, 'timestamp': '2025-09-30 22:20:58.546902', 'step': 7354, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:58.586091', 'step': 7354, 'epoch': 1} {'type': 'loss', 'content': 0.2200576663017273, 'timestamp': '2025-09-30 22:20:58.597209', 'step': 7355, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:20:58.651466', 'step': 7355, 'epoch': 1} {'type': 'loss', 'content': 0.1018269807100296, 'timestamp': '2025-09-30 22:20:58.681067', 'step': 7356, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:58.725258', 'step': 7356, 'epoch': 1} {'type': 'loss', 'content': 0.2131575345993042, 'timestamp': '2025-09-30 22:20:58.741702', 'step': 7357, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:58.778025', 'step': 7357, 'epoch': 1} {'type': 'loss', 'content': 0.08667898923158646, 'timestamp': '2025-09-30 22:20:58.805378', 'step': 7358, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:58.850108', 'step': 7358, 'epoch': 1} {'type': 'loss', 'content': 0.1741117686033249, 'timestamp': '2025-09-30 22:20:58.880949', 'step': 7359, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:58.934047', 'step': 7359, 'epoch': 1} {'type': 'loss', 'content': 0.13591952621936798, 'timestamp': '2025-09-30 22:20:58.975680', 'step': 7360, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:59.015001', 'step': 7360, 'epoch': 1} {'type': 'loss', 'content': 0.18313653767108917, 'timestamp': '2025-09-30 22:20:59.026821', 'step': 7361, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:59.063243', 'step': 7361, 'epoch': 1} {'type': 'loss', 'content': 0.10794088989496231, 'timestamp': '2025-09-30 22:20:59.068943', 'step': 7362, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.113723', 'step': 7362, 'epoch': 1} {'type': 'loss', 'content': 0.07936406135559082, 'timestamp': '2025-09-30 22:20:59.123688', 'step': 7363, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:59.167398', 'step': 7363, 'epoch': 1} {'type': 'loss', 'content': 0.22505497932434082, 'timestamp': '2025-09-30 22:20:59.205944', 'step': 7364, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:59.248477', 'step': 7364, 'epoch': 1} {'type': 'loss', 'content': 0.2186536192893982, 'timestamp': '2025-09-30 22:20:59.253652', 'step': 7365, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:59.292069', 'step': 7365, 'epoch': 1} {'type': 'loss', 'content': 0.06917110085487366, 'timestamp': '2025-09-30 22:20:59.301038', 'step': 7366, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:59.336098', 'step': 7366, 'epoch': 1} {'type': 'loss', 'content': 0.1318136602640152, 'timestamp': '2025-09-30 22:20:59.344804', 'step': 7367, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:20:59.391724', 'step': 7367, 'epoch': 1} {'type': 'loss', 'content': 0.20621024072170258, 'timestamp': '2025-09-30 22:20:59.422789', 'step': 7368, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.463919', 'step': 7368, 'epoch': 1} {'type': 'loss', 'content': 0.16538450121879578, 'timestamp': '2025-09-30 22:20:59.474655', 'step': 7369, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.509506', 'step': 7369, 'epoch': 1} {'type': 'loss', 'content': 0.13406683504581451, 'timestamp': '2025-09-30 22:20:59.534138', 'step': 7370, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:20:59.571575', 'step': 7370, 'epoch': 1} {'type': 'loss', 'content': 0.11242951452732086, 'timestamp': '2025-09-30 22:20:59.584162', 'step': 7371, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.621557', 'step': 7371, 'epoch': 1} {'type': 'loss', 'content': 0.14234532415866852, 'timestamp': '2025-09-30 22:20:59.661040', 'step': 7372, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.701395', 'step': 7372, 'epoch': 1} {'type': 'loss', 'content': 0.17347049713134766, 'timestamp': '2025-09-30 22:20:59.714880', 'step': 7373, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:20:59.767491', 'step': 7373, 'epoch': 1} {'type': 'loss', 'content': 0.146390900015831, 'timestamp': '2025-09-30 22:20:59.789369', 'step': 7374, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:20:59.828030', 'step': 7374, 'epoch': 1} {'type': 'loss', 'content': 0.13380302488803864, 'timestamp': '2025-09-30 22:20:59.843038', 'step': 7375, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:59.887787', 'step': 7375, 'epoch': 1} {'type': 'loss', 'content': 0.2047548145055771, 'timestamp': '2025-09-30 22:20:59.918575', 'step': 7376, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:20:59.971319', 'step': 7376, 'epoch': 1} {'type': 'loss', 'content': 0.19904328882694244, 'timestamp': '2025-09-30 22:20:59.985793', 'step': 7377, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.027924', 'step': 7377, 'epoch': 1} {'type': 'loss', 'content': 0.13123512268066406, 'timestamp': '2025-09-30 22:21:00.042164', 'step': 7378, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.086418', 'step': 7378, 'epoch': 1} {'type': 'loss', 'content': 0.10355847328901291, 'timestamp': '2025-09-30 22:21:00.091011', 'step': 7379, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.122462', 'step': 7379, 'epoch': 1} {'type': 'loss', 'content': 0.14249910414218903, 'timestamp': '2025-09-30 22:21:00.158071', 'step': 7380, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.198220', 'step': 7380, 'epoch': 1} {'type': 'loss', 'content': 0.2080194056034088, 'timestamp': '2025-09-30 22:21:00.203908', 'step': 7381, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:00.236787', 'step': 7381, 'epoch': 1} {'type': 'loss', 'content': 0.18772532045841217, 'timestamp': '2025-09-30 22:21:00.239864', 'step': 7382, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.280867', 'step': 7382, 'epoch': 1} {'type': 'loss', 'content': 0.15548932552337646, 'timestamp': '2025-09-30 22:21:00.285388', 'step': 7383, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.317122', 'step': 7383, 'epoch': 1} {'type': 'loss', 'content': 0.13395391404628754, 'timestamp': '2025-09-30 22:21:00.342465', 'step': 7384, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.383104', 'step': 7384, 'epoch': 1} {'type': 'loss', 'content': 0.22439363598823547, 'timestamp': '2025-09-30 22:21:00.399058', 'step': 7385, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.442793', 'step': 7385, 'epoch': 1} {'type': 'loss', 'content': 0.09449198842048645, 'timestamp': '2025-09-30 22:21:00.455760', 'step': 7386, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.496763', 'step': 7386, 'epoch': 1} {'type': 'loss', 'content': 0.07296023517847061, 'timestamp': '2025-09-30 22:21:00.501108', 'step': 7387, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.550481', 'step': 7387, 'epoch': 1} {'type': 'loss', 'content': 0.11583147943019867, 'timestamp': '2025-09-30 22:21:00.588678', 'step': 7388, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.632100', 'step': 7388, 'epoch': 1} {'type': 'loss', 'content': 0.2068696767091751, 'timestamp': '2025-09-30 22:21:00.634923', 'step': 7389, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.674330', 'step': 7389, 'epoch': 1} {'type': 'loss', 'content': 0.15592306852340698, 'timestamp': '2025-09-30 22:21:00.691649', 'step': 7390, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.737070', 'step': 7390, 'epoch': 1} {'type': 'loss', 'content': 0.23790434002876282, 'timestamp': '2025-09-30 22:21:00.741474', 'step': 7391, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.783663', 'step': 7391, 'epoch': 1} {'type': 'loss', 'content': 0.13726794719696045, 'timestamp': '2025-09-30 22:21:00.817431', 'step': 7392, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:00.860406', 'step': 7392, 'epoch': 1} {'type': 'loss', 'content': 0.18506421148777008, 'timestamp': '2025-09-30 22:21:00.865954', 'step': 7393, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.910919', 'step': 7393, 'epoch': 1} {'type': 'loss', 'content': 0.13864141702651978, 'timestamp': '2025-09-30 22:21:00.914963', 'step': 7394, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:00.958805', 'step': 7394, 'epoch': 1} {'type': 'loss', 'content': 0.13608504831790924, 'timestamp': '2025-09-30 22:21:00.961727', 'step': 7395, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:00.992913', 'step': 7395, 'epoch': 1} {'type': 'loss', 'content': 0.10714311897754669, 'timestamp': '2025-09-30 22:21:01.029231', 'step': 7396, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:01.073100', 'step': 7396, 'epoch': 1} {'type': 'loss', 'content': 0.11690858006477356, 'timestamp': '2025-09-30 22:21:01.076254', 'step': 7397, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:01.108017', 'step': 7397, 'epoch': 1} {'type': 'loss', 'content': 0.10582853108644485, 'timestamp': '2025-09-30 22:21:01.121028', 'step': 7398, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.164025', 'step': 7398, 'epoch': 1} {'type': 'loss', 'content': 0.13534292578697205, 'timestamp': '2025-09-30 22:21:01.167438', 'step': 7399, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.210402', 'step': 7399, 'epoch': 1} {'type': 'loss', 'content': 0.12157239019870758, 'timestamp': '2025-09-30 22:21:01.234768', 'step': 7400, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:01.281163', 'step': 7400, 'epoch': 1} {'type': 'loss', 'content': 0.2296542525291443, 'timestamp': '2025-09-30 22:21:01.296258', 'step': 7401, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.337327', 'step': 7401, 'epoch': 1} {'type': 'loss', 'content': 0.1508462280035019, 'timestamp': '2025-09-30 22:21:01.351601', 'step': 7402, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:01.389070', 'step': 7402, 'epoch': 1} {'type': 'loss', 'content': 0.13648532330989838, 'timestamp': '2025-09-30 22:21:01.396348', 'step': 7403, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.448738', 'step': 7403, 'epoch': 1} {'type': 'loss', 'content': 0.18067587912082672, 'timestamp': '2025-09-30 22:21:01.486883', 'step': 7404, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:01.524548', 'step': 7404, 'epoch': 1} {'type': 'loss', 'content': 0.09857361763715744, 'timestamp': '2025-09-30 22:21:01.529055', 'step': 7405, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:01.563538', 'step': 7405, 'epoch': 1} {'type': 'loss', 'content': 0.14031323790550232, 'timestamp': '2025-09-30 22:21:01.568382', 'step': 7406, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.610689', 'step': 7406, 'epoch': 1} {'type': 'loss', 'content': 0.2018025666475296, 'timestamp': '2025-09-30 22:21:01.618763', 'step': 7407, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.652502', 'step': 7407, 'epoch': 1} {'type': 'loss', 'content': 0.30156755447387695, 'timestamp': '2025-09-30 22:21:01.682391', 'step': 7408, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:01.716499', 'step': 7408, 'epoch': 1} {'type': 'loss', 'content': 0.2270808219909668, 'timestamp': '2025-09-30 22:21:01.722229', 'step': 7409, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.771997', 'step': 7409, 'epoch': 1} {'type': 'loss', 'content': 0.11023242026567459, 'timestamp': '2025-09-30 22:21:01.785268', 'step': 7410, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:01.831721', 'step': 7410, 'epoch': 1} {'type': 'loss', 'content': 0.07459571212530136, 'timestamp': '2025-09-30 22:21:01.837593', 'step': 7411, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:01.876142', 'step': 7411, 'epoch': 1} {'type': 'loss', 'content': 0.11160463839769363, 'timestamp': '2025-09-30 22:21:01.911947', 'step': 7412, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:01.950567', 'step': 7412, 'epoch': 1} {'type': 'loss', 'content': 0.1768215149641037, 'timestamp': '2025-09-30 22:21:01.966495', 'step': 7413, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:02.001512', 'step': 7413, 'epoch': 1} {'type': 'loss', 'content': 0.057414259761571884, 'timestamp': '2025-09-30 22:21:02.006408', 'step': 7414, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.040918', 'step': 7414, 'epoch': 1} {'type': 'loss', 'content': 0.06582339853048325, 'timestamp': '2025-09-30 22:21:02.051406', 'step': 7415, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:02.089070', 'step': 7415, 'epoch': 1} {'type': 'loss', 'content': 0.10620111227035522, 'timestamp': '2025-09-30 22:21:02.135179', 'step': 7416, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:02.172511', 'step': 7416, 'epoch': 1} {'type': 'loss', 'content': 0.21596387028694153, 'timestamp': '2025-09-30 22:21:02.177115', 'step': 7417, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:02.214914', 'step': 7417, 'epoch': 1} {'type': 'loss', 'content': 0.06725442409515381, 'timestamp': '2025-09-30 22:21:02.229813', 'step': 7418, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.264141', 'step': 7418, 'epoch': 1} {'type': 'loss', 'content': 0.15384356677532196, 'timestamp': '2025-09-30 22:21:02.280586', 'step': 7419, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:02.317765', 'step': 7419, 'epoch': 1} {'type': 'loss', 'content': 0.13809876143932343, 'timestamp': '2025-09-30 22:21:02.357425', 'step': 7420, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:02.392932', 'step': 7420, 'epoch': 1} {'type': 'loss', 'content': 0.08457455039024353, 'timestamp': '2025-09-30 22:21:02.409815', 'step': 7421, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.458039', 'step': 7421, 'epoch': 1} {'type': 'loss', 'content': 0.17977264523506165, 'timestamp': '2025-09-30 22:21:02.466988', 'step': 7422, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.518904', 'step': 7422, 'epoch': 1} {'type': 'loss', 'content': 0.15851999819278717, 'timestamp': '2025-09-30 22:21:02.524089', 'step': 7423, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.570324', 'step': 7423, 'epoch': 1} {'type': 'loss', 'content': 0.09561730921268463, 'timestamp': '2025-09-30 22:21:02.618175', 'step': 7424, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.672043', 'step': 7424, 'epoch': 1} {'type': 'loss', 'content': 0.24090678989887238, 'timestamp': '2025-09-30 22:21:02.681279', 'step': 7425, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.723843', 'step': 7425, 'epoch': 1} {'type': 'loss', 'content': 0.10158952325582504, 'timestamp': '2025-09-30 22:21:02.731328', 'step': 7426, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:02.763800', 'step': 7426, 'epoch': 1} {'type': 'loss', 'content': 0.10417483001947403, 'timestamp': '2025-09-30 22:21:02.768895', 'step': 7427, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:02.818523', 'step': 7427, 'epoch': 1} {'type': 'loss', 'content': 0.07683242112398148, 'timestamp': '2025-09-30 22:21:02.861558', 'step': 7428, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:02.912180', 'step': 7428, 'epoch': 1} {'type': 'loss', 'content': 0.17279914021492004, 'timestamp': '2025-09-30 22:21:02.932658', 'step': 7429, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:02.982716', 'step': 7429, 'epoch': 1} {'type': 'loss', 'content': 0.10735224932432175, 'timestamp': '2025-09-30 22:21:02.987267', 'step': 7430, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:03.019629', 'step': 7430, 'epoch': 1} {'type': 'loss', 'content': 0.18220384418964386, 'timestamp': '2025-09-30 22:21:03.036492', 'step': 7431, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:03.070805', 'step': 7431, 'epoch': 1} {'type': 'loss', 'content': 0.17085999250411987, 'timestamp': '2025-09-30 22:21:03.097643', 'step': 7432, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:03.130659', 'step': 7432, 'epoch': 1} {'type': 'loss', 'content': 0.13865214586257935, 'timestamp': '2025-09-30 22:21:03.143708', 'step': 7433, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:03.183178', 'step': 7433, 'epoch': 1} {'type': 'loss', 'content': 0.16671065986156464, 'timestamp': '2025-09-30 22:21:03.187442', 'step': 7434, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.219761', 'step': 7434, 'epoch': 1} {'type': 'loss', 'content': 0.14238350093364716, 'timestamp': '2025-09-30 22:21:03.236320', 'step': 7435, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.284990', 'step': 7435, 'epoch': 1} {'type': 'loss', 'content': 0.13811497390270233, 'timestamp': '2025-09-30 22:21:03.321071', 'step': 7436, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.354406', 'step': 7436, 'epoch': 1} {'type': 'loss', 'content': 0.16977909207344055, 'timestamp': '2025-09-30 22:21:03.362101', 'step': 7437, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:03.403166', 'step': 7437, 'epoch': 1} {'type': 'loss', 'content': 0.07604994624853134, 'timestamp': '2025-09-30 22:21:03.423411', 'step': 7438, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:03.470511', 'step': 7438, 'epoch': 1} {'type': 'loss', 'content': 0.11950640380382538, 'timestamp': '2025-09-30 22:21:03.477049', 'step': 7439, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:03.521777', 'step': 7439, 'epoch': 1} {'type': 'loss', 'content': 0.21188418567180634, 'timestamp': '2025-09-30 22:21:03.559290', 'step': 7440, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:03.594618', 'step': 7440, 'epoch': 1} {'type': 'loss', 'content': 0.15215592086315155, 'timestamp': '2025-09-30 22:21:03.600138', 'step': 7441, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:03.631673', 'step': 7441, 'epoch': 1} {'type': 'loss', 'content': 0.1315712183713913, 'timestamp': '2025-09-30 22:21:03.637023', 'step': 7442, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:21:03.671637', 'step': 7442, 'epoch': 1} {'type': 'loss', 'content': 0.2603244483470917, 'timestamp': '2025-09-30 22:21:03.689380', 'step': 7443, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:03.736533', 'step': 7443, 'epoch': 1} {'type': 'loss', 'content': 0.06296196579933167, 'timestamp': '2025-09-30 22:21:03.779723', 'step': 7444, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.812208', 'step': 7444, 'epoch': 1} {'type': 'loss', 'content': 0.1322169303894043, 'timestamp': '2025-09-30 22:21:03.833568', 'step': 7445, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.866457', 'step': 7445, 'epoch': 1} {'type': 'loss', 'content': 0.12126009911298752, 'timestamp': '2025-09-30 22:21:03.870789', 'step': 7446, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.913893', 'step': 7446, 'epoch': 1} {'type': 'loss', 'content': 0.24283109605312347, 'timestamp': '2025-09-30 22:21:03.919928', 'step': 7447, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:03.958201', 'step': 7447, 'epoch': 1} {'type': 'loss', 'content': 0.149063378572464, 'timestamp': '2025-09-30 22:21:03.998073', 'step': 7448, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:04.042336', 'step': 7448, 'epoch': 1} {'type': 'loss', 'content': 0.11206915974617004, 'timestamp': '2025-09-30 22:21:04.046390', 'step': 7449, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:04.083004', 'step': 7449, 'epoch': 1} {'type': 'loss', 'content': 0.14218053221702576, 'timestamp': '2025-09-30 22:21:04.086613', 'step': 7450, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:04.122374', 'step': 7450, 'epoch': 1} {'type': 'loss', 'content': 0.15401525795459747, 'timestamp': '2025-09-30 22:21:04.127679', 'step': 7451, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:04.160300', 'step': 7451, 'epoch': 1} {'type': 'loss', 'content': 0.10237876325845718, 'timestamp': '2025-09-30 22:21:04.186575', 'step': 7452, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:04.219667', 'step': 7452, 'epoch': 1} {'type': 'loss', 'content': 0.09458744525909424, 'timestamp': '2025-09-30 22:21:04.223632', 'step': 7453, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:04.263901', 'step': 7453, 'epoch': 1} {'type': 'loss', 'content': 0.0595889575779438, 'timestamp': '2025-09-30 22:21:04.286303', 'step': 7454, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:04.321790', 'step': 7454, 'epoch': 1} {'type': 'loss', 'content': 0.3628726601600647, 'timestamp': '2025-09-30 22:21:04.326301', 'step': 7455, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:04.368810', 'step': 7455, 'epoch': 1} {'type': 'loss', 'content': 0.14818701148033142, 'timestamp': '2025-09-30 22:21:04.403288', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:21:13.260876', 'step': 7456, 'epoch': 1} {'type': 'pplx', 'content': 9093.975498307002, 'timestamp': '2025-09-30 22:21:13.283535', 'step': 7456, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.328037', 'step': 7456, 'epoch': 1} {'type': 'loss', 'content': 0.15137283504009247, 'timestamp': '2025-09-30 22:21:13.346579', 'step': 7457, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.398779', 'step': 7457, 'epoch': 1} {'type': 'loss', 'content': 0.13771063089370728, 'timestamp': '2025-09-30 22:21:13.417474', 'step': 7458, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:13.463544', 'step': 7458, 'epoch': 1} {'type': 'loss', 'content': 0.08276071399450302, 'timestamp': '2025-09-30 22:21:13.481089', 'step': 7459, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-30 22:21:13.558368', 'step': 7459, 'epoch': 1} {'type': 'loss', 'content': 0.3930889368057251, 'timestamp': '2025-09-30 22:21:13.596429', 'step': 7460, 'epoch': 1} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.644075', 'step': 7460, 'epoch': 2} {'type': 'loss', 'content': 0.08474373072385788, 'timestamp': '2025-09-30 22:21:13.659631', 'step': 7461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.701294', 'step': 7461, 'epoch': 2} {'type': 'loss', 'content': 0.10203493386507034, 'timestamp': '2025-09-30 22:21:13.715776', 'step': 7462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:13.762306', 'step': 7462, 'epoch': 2} {'type': 'loss', 'content': 0.12935811281204224, 'timestamp': '2025-09-30 22:21:13.776715', 'step': 7463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:13.816647', 'step': 7463, 'epoch': 2} {'type': 'loss', 'content': 0.127817302942276, 'timestamp': '2025-09-30 22:21:13.851550', 'step': 7464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:13.891677', 'step': 7464, 'epoch': 2} {'type': 'loss', 'content': 0.2181871384382248, 'timestamp': '2025-09-30 22:21:13.903875', 'step': 7465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.945050', 'step': 7465, 'epoch': 2} {'type': 'loss', 'content': 0.07980192452669144, 'timestamp': '2025-09-30 22:21:13.956345', 'step': 7466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:13.996891', 'step': 7466, 'epoch': 2} {'type': 'loss', 'content': 0.17366304993629456, 'timestamp': '2025-09-30 22:21:14.001673', 'step': 7467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:14.034171', 'step': 7467, 'epoch': 2} {'type': 'loss', 'content': 0.07825703173875809, 'timestamp': '2025-09-30 22:21:14.060018', 'step': 7468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:14.104037', 'step': 7468, 'epoch': 2} {'type': 'loss', 'content': 0.09777402132749557, 'timestamp': '2025-09-30 22:21:14.113060', 'step': 7469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.146503', 'step': 7469, 'epoch': 2} {'type': 'loss', 'content': 0.1370391845703125, 'timestamp': '2025-09-30 22:21:14.164040', 'step': 7470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.206882', 'step': 7470, 'epoch': 2} {'type': 'loss', 'content': 0.07646885514259338, 'timestamp': '2025-09-30 22:21:14.222619', 'step': 7471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:14.267172', 'step': 7471, 'epoch': 2} {'type': 'loss', 'content': 0.15107925236225128, 'timestamp': '2025-09-30 22:21:14.294556', 'step': 7472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:14.341311', 'step': 7472, 'epoch': 2} {'type': 'loss', 'content': 0.09902480989694595, 'timestamp': '2025-09-30 22:21:14.345562', 'step': 7473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:14.388084', 'step': 7473, 'epoch': 2} {'type': 'loss', 'content': 0.08924809843301773, 'timestamp': '2025-09-30 22:21:14.393368', 'step': 7474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:14.435820', 'step': 7474, 'epoch': 2} {'type': 'loss', 'content': 0.11348185688257217, 'timestamp': '2025-09-30 22:21:14.449632', 'step': 7475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:14.492488', 'step': 7475, 'epoch': 2} {'type': 'loss', 'content': 0.16895364224910736, 'timestamp': '2025-09-30 22:21:14.528211', 'step': 7476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.571214', 'step': 7476, 'epoch': 2} {'type': 'loss', 'content': 0.05296709015965462, 'timestamp': '2025-09-30 22:21:14.586877', 'step': 7477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.630336', 'step': 7477, 'epoch': 2} {'type': 'loss', 'content': 0.1456029713153839, 'timestamp': '2025-09-30 22:21:14.647464', 'step': 7478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:14.692031', 'step': 7478, 'epoch': 2} {'type': 'loss', 'content': 0.21284902095794678, 'timestamp': '2025-09-30 22:21:14.707701', 'step': 7479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.755159', 'step': 7479, 'epoch': 2} {'type': 'loss', 'content': 0.05756638944149017, 'timestamp': '2025-09-30 22:21:14.790123', 'step': 7480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.824531', 'step': 7480, 'epoch': 2} {'type': 'loss', 'content': 0.07182108610868454, 'timestamp': '2025-09-30 22:21:14.835984', 'step': 7481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:14.874548', 'step': 7481, 'epoch': 2} {'type': 'loss', 'content': 0.13461333513259888, 'timestamp': '2025-09-30 22:21:14.884375', 'step': 7482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:14.927156', 'step': 7482, 'epoch': 2} {'type': 'loss', 'content': 0.0676090344786644, 'timestamp': '2025-09-30 22:21:14.942201', 'step': 7483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:14.985326', 'step': 7483, 'epoch': 2} {'type': 'loss', 'content': 0.1432725042104721, 'timestamp': '2025-09-30 22:21:15.011148', 'step': 7484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.050558', 'step': 7484, 'epoch': 2} {'type': 'loss', 'content': 0.05732604116201401, 'timestamp': '2025-09-30 22:21:15.061144', 'step': 7485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:15.099656', 'step': 7485, 'epoch': 2} {'type': 'loss', 'content': 0.13764424622058868, 'timestamp': '2025-09-30 22:21:15.111556', 'step': 7486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.150902', 'step': 7486, 'epoch': 2} {'type': 'loss', 'content': 0.19573108851909637, 'timestamp': '2025-09-30 22:21:15.164175', 'step': 7487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:15.204546', 'step': 7487, 'epoch': 2} {'type': 'loss', 'content': 0.10266929864883423, 'timestamp': '2025-09-30 22:21:15.238773', 'step': 7488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:15.270497', 'step': 7488, 'epoch': 2} {'type': 'loss', 'content': 0.0987316146492958, 'timestamp': '2025-09-30 22:21:15.285073', 'step': 7489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:15.328686', 'step': 7489, 'epoch': 2} {'type': 'loss', 'content': 0.07433472573757172, 'timestamp': '2025-09-30 22:21:15.341230', 'step': 7490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.381314', 'step': 7490, 'epoch': 2} {'type': 'loss', 'content': 0.1299804151058197, 'timestamp': '2025-09-30 22:21:15.394828', 'step': 7491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:15.433914', 'step': 7491, 'epoch': 2} {'type': 'loss', 'content': 0.08002497255802155, 'timestamp': '2025-09-30 22:21:15.467401', 'step': 7492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.507604', 'step': 7492, 'epoch': 2} {'type': 'loss', 'content': 0.1913783848285675, 'timestamp': '2025-09-30 22:21:15.518891', 'step': 7493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.559395', 'step': 7493, 'epoch': 2} {'type': 'loss', 'content': 0.1382981836795807, 'timestamp': '2025-09-30 22:21:15.571172', 'step': 7494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:15.612830', 'step': 7494, 'epoch': 2} {'type': 'loss', 'content': 0.17630501091480255, 'timestamp': '2025-09-30 22:21:15.624978', 'step': 7495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:15.666588', 'step': 7495, 'epoch': 2} {'type': 'loss', 'content': 0.05772862955927849, 'timestamp': '2025-09-30 22:21:15.699337', 'step': 7496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.737983', 'step': 7496, 'epoch': 2} {'type': 'loss', 'content': 0.1208806037902832, 'timestamp': '2025-09-30 22:21:15.749449', 'step': 7497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:15.793079', 'step': 7497, 'epoch': 2} {'type': 'loss', 'content': 0.08121898770332336, 'timestamp': '2025-09-30 22:21:15.805336', 'step': 7498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:15.837095', 'step': 7498, 'epoch': 2} {'type': 'loss', 'content': 0.1773262470960617, 'timestamp': '2025-09-30 22:21:15.841418', 'step': 7499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:15.880841', 'step': 7499, 'epoch': 2} {'type': 'loss', 'content': 0.14954209327697754, 'timestamp': '2025-09-30 22:21:15.915079', 'step': 7500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 7500', 'timestamp': '2025-09-30 22:21:21.107463', 'step': 7500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:21.156983', 'step': 7500, 'epoch': 2} {'type': 'loss', 'content': 0.09148172289133072, 'timestamp': '2025-09-30 22:21:21.178905', 'step': 7501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.227263', 'step': 7501, 'epoch': 2} {'type': 'loss', 'content': 0.13526125252246857, 'timestamp': '2025-09-30 22:21:21.245107', 'step': 7502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.289627', 'step': 7502, 'epoch': 2} {'type': 'loss', 'content': 0.13135913014411926, 'timestamp': '2025-09-30 22:21:21.298474', 'step': 7503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.333576', 'step': 7503, 'epoch': 2} {'type': 'loss', 'content': 0.1658037006855011, 'timestamp': '2025-09-30 22:21:21.359945', 'step': 7504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.400620', 'step': 7504, 'epoch': 2} {'type': 'loss', 'content': 0.18675380945205688, 'timestamp': '2025-09-30 22:21:21.411442', 'step': 7505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:21.454500', 'step': 7505, 'epoch': 2} {'type': 'loss', 'content': 0.12716417014598846, 'timestamp': '2025-09-30 22:21:21.462228', 'step': 7506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:21.497830', 'step': 7506, 'epoch': 2} {'type': 'loss', 'content': 0.14554709196090698, 'timestamp': '2025-09-30 22:21:21.503883', 'step': 7507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.539885', 'step': 7507, 'epoch': 2} {'type': 'loss', 'content': 0.19209043681621552, 'timestamp': '2025-09-30 22:21:21.567302', 'step': 7508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:21.602463', 'step': 7508, 'epoch': 2} {'type': 'loss', 'content': 0.16982409358024597, 'timestamp': '2025-09-30 22:21:21.605948', 'step': 7509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.637843', 'step': 7509, 'epoch': 2} {'type': 'loss', 'content': 0.10382430255413055, 'timestamp': '2025-09-30 22:21:21.641608', 'step': 7510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:21.673831', 'step': 7510, 'epoch': 2} {'type': 'loss', 'content': 0.14845870435237885, 'timestamp': '2025-09-30 22:21:21.679404', 'step': 7511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.714452', 'step': 7511, 'epoch': 2} {'type': 'loss', 'content': 0.20788131654262543, 'timestamp': '2025-09-30 22:21:21.739202', 'step': 7512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:21.770151', 'step': 7512, 'epoch': 2} {'type': 'loss', 'content': 0.10914800316095352, 'timestamp': '2025-09-30 22:21:21.773167', 'step': 7513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:21.812941', 'step': 7513, 'epoch': 2} {'type': 'loss', 'content': 0.09579972922801971, 'timestamp': '2025-09-30 22:21:21.815610', 'step': 7514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:21.847190', 'step': 7514, 'epoch': 2} {'type': 'loss', 'content': 0.1408448964357376, 'timestamp': '2025-09-30 22:21:21.853931', 'step': 7515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:21.888046', 'step': 7515, 'epoch': 2} {'type': 'loss', 'content': 0.09534936398267746, 'timestamp': '2025-09-30 22:21:21.919860', 'step': 7516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:21.956001', 'step': 7516, 'epoch': 2} {'type': 'loss', 'content': 0.11068664491176605, 'timestamp': '2025-09-30 22:21:21.961954', 'step': 7517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:21.995052', 'step': 7517, 'epoch': 2} {'type': 'loss', 'content': 0.16421128809452057, 'timestamp': '2025-09-30 22:21:21.999204', 'step': 7518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:22.032559', 'step': 7518, 'epoch': 2} {'type': 'loss', 'content': 0.1615094393491745, 'timestamp': '2025-09-30 22:21:22.036040', 'step': 7519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:22.068522', 'step': 7519, 'epoch': 2} {'type': 'loss', 'content': 0.16225126385688782, 'timestamp': '2025-09-30 22:21:22.093649', 'step': 7520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:22.128397', 'step': 7520, 'epoch': 2} {'type': 'loss', 'content': 0.14831499755382538, 'timestamp': '2025-09-30 22:21:22.132938', 'step': 7521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:22.165455', 'step': 7521, 'epoch': 2} {'type': 'loss', 'content': 0.09625984728336334, 'timestamp': '2025-09-30 22:21:22.170230', 'step': 7522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:22.203102', 'step': 7522, 'epoch': 2} {'type': 'loss', 'content': 0.2105506956577301, 'timestamp': '2025-09-30 22:21:22.207499', 'step': 7523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.240261', 'step': 7523, 'epoch': 2} {'type': 'loss', 'content': 0.10753947496414185, 'timestamp': '2025-09-30 22:21:22.266108', 'step': 7524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:22.297589', 'step': 7524, 'epoch': 2} {'type': 'loss', 'content': 0.07145775109529495, 'timestamp': '2025-09-30 22:21:22.300609', 'step': 7525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:22.332600', 'step': 7525, 'epoch': 2} {'type': 'loss', 'content': 0.1651902049779892, 'timestamp': '2025-09-30 22:21:22.335598', 'step': 7526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:22.367256', 'step': 7526, 'epoch': 2} {'type': 'loss', 'content': 0.056222133338451385, 'timestamp': '2025-09-30 22:21:22.376839', 'step': 7527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.415394', 'step': 7527, 'epoch': 2} {'type': 'loss', 'content': 0.14204494655132294, 'timestamp': '2025-09-30 22:21:22.440675', 'step': 7528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:22.481895', 'step': 7528, 'epoch': 2} {'type': 'loss', 'content': 0.1915704756975174, 'timestamp': '2025-09-30 22:21:22.496731', 'step': 7529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.538643', 'step': 7529, 'epoch': 2} {'type': 'loss', 'content': 0.18073832988739014, 'timestamp': '2025-09-30 22:21:22.548859', 'step': 7530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.588258', 'step': 7530, 'epoch': 2} {'type': 'loss', 'content': 0.1132291778922081, 'timestamp': '2025-09-30 22:21:22.598455', 'step': 7531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.634635', 'step': 7531, 'epoch': 2} {'type': 'loss', 'content': 0.12302350252866745, 'timestamp': '2025-09-30 22:21:22.667802', 'step': 7532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:22.713575', 'step': 7532, 'epoch': 2} {'type': 'loss', 'content': 0.17063336074352264, 'timestamp': '2025-09-30 22:21:22.726282', 'step': 7533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.764092', 'step': 7533, 'epoch': 2} {'type': 'loss', 'content': 0.07623850554227829, 'timestamp': '2025-09-30 22:21:22.773946', 'step': 7534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:22.816766', 'step': 7534, 'epoch': 2} {'type': 'loss', 'content': 0.13058961927890778, 'timestamp': '2025-09-30 22:21:22.829617', 'step': 7535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:22.869139', 'step': 7535, 'epoch': 2} {'type': 'loss', 'content': 0.2348162978887558, 'timestamp': '2025-09-30 22:21:22.902622', 'step': 7536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:22.944437', 'step': 7536, 'epoch': 2} {'type': 'loss', 'content': 0.09004774689674377, 'timestamp': '2025-09-30 22:21:22.956898', 'step': 7537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:22.999779', 'step': 7537, 'epoch': 2} {'type': 'loss', 'content': 0.10620792955160141, 'timestamp': '2025-09-30 22:21:23.004919', 'step': 7538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:23.036743', 'step': 7538, 'epoch': 2} {'type': 'loss', 'content': 0.12559311091899872, 'timestamp': '2025-09-30 22:21:23.049542', 'step': 7539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:23.090376', 'step': 7539, 'epoch': 2} {'type': 'loss', 'content': 0.12905335426330566, 'timestamp': '2025-09-30 22:21:23.126877', 'step': 7540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:23.167713', 'step': 7540, 'epoch': 2} {'type': 'loss', 'content': 0.13931921124458313, 'timestamp': '2025-09-30 22:21:23.173598', 'step': 7541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:23.213861', 'step': 7541, 'epoch': 2} {'type': 'loss', 'content': 0.1529562920331955, 'timestamp': '2025-09-30 22:21:23.227028', 'step': 7542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:23.267821', 'step': 7542, 'epoch': 2} {'type': 'loss', 'content': 0.12577100098133087, 'timestamp': '2025-09-30 22:21:23.279351', 'step': 7543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.317184', 'step': 7543, 'epoch': 2} {'type': 'loss', 'content': 0.11745379120111465, 'timestamp': '2025-09-30 22:21:23.356169', 'step': 7544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:23.395337', 'step': 7544, 'epoch': 2} {'type': 'loss', 'content': 0.08844387531280518, 'timestamp': '2025-09-30 22:21:23.404319', 'step': 7545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:23.442327', 'step': 7545, 'epoch': 2} {'type': 'loss', 'content': 0.10709463804960251, 'timestamp': '2025-09-30 22:21:23.451675', 'step': 7546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.488442', 'step': 7546, 'epoch': 2} {'type': 'loss', 'content': 0.14288398623466492, 'timestamp': '2025-09-30 22:21:23.502471', 'step': 7547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:23.544370', 'step': 7547, 'epoch': 2} {'type': 'loss', 'content': 0.22944660484790802, 'timestamp': '2025-09-30 22:21:23.574429', 'step': 7548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.617805', 'step': 7548, 'epoch': 2} {'type': 'loss', 'content': 0.0828641876578331, 'timestamp': '2025-09-30 22:21:23.625719', 'step': 7549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.663380', 'step': 7549, 'epoch': 2} {'type': 'loss', 'content': 0.21182803809642792, 'timestamp': '2025-09-30 22:21:23.671858', 'step': 7550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.708069', 'step': 7550, 'epoch': 2} {'type': 'loss', 'content': 0.08151224255561829, 'timestamp': '2025-09-30 22:21:23.717676', 'step': 7551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.756090', 'step': 7551, 'epoch': 2} {'type': 'loss', 'content': 0.10525200515985489, 'timestamp': '2025-09-30 22:21:23.785831', 'step': 7552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.821068', 'step': 7552, 'epoch': 2} {'type': 'loss', 'content': 0.14887148141860962, 'timestamp': '2025-09-30 22:21:23.830022', 'step': 7553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:23.867150', 'step': 7553, 'epoch': 2} {'type': 'loss', 'content': 0.09895980358123779, 'timestamp': '2025-09-30 22:21:23.877868', 'step': 7554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:23.918579', 'step': 7554, 'epoch': 2} {'type': 'loss', 'content': 0.0874364823102951, 'timestamp': '2025-09-30 22:21:23.927161', 'step': 7555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:23.963076', 'step': 7555, 'epoch': 2} {'type': 'loss', 'content': 0.1800166517496109, 'timestamp': '2025-09-30 22:21:23.995039', 'step': 7556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.033990', 'step': 7556, 'epoch': 2} {'type': 'loss', 'content': 0.12577004730701447, 'timestamp': '2025-09-30 22:21:24.045453', 'step': 7557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.082600', 'step': 7557, 'epoch': 2} {'type': 'loss', 'content': 0.1423235386610031, 'timestamp': '2025-09-30 22:21:24.098307', 'step': 7558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:24.140595', 'step': 7558, 'epoch': 2} {'type': 'loss', 'content': 0.11460945755243301, 'timestamp': '2025-09-30 22:21:24.153290', 'step': 7559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.194576', 'step': 7559, 'epoch': 2} {'type': 'loss', 'content': 0.14370106160640717, 'timestamp': '2025-09-30 22:21:24.229087', 'step': 7560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:24.270264', 'step': 7560, 'epoch': 2} {'type': 'loss', 'content': 0.18119817972183228, 'timestamp': '2025-09-30 22:21:24.283873', 'step': 7561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:24.327685', 'step': 7561, 'epoch': 2} {'type': 'loss', 'content': 0.10066475719213486, 'timestamp': '2025-09-30 22:21:24.336469', 'step': 7562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:24.372979', 'step': 7562, 'epoch': 2} {'type': 'loss', 'content': 0.19656331837177277, 'timestamp': '2025-09-30 22:21:24.380302', 'step': 7563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.417626', 'step': 7563, 'epoch': 2} {'type': 'loss', 'content': 0.18292923271656036, 'timestamp': '2025-09-30 22:21:24.448121', 'step': 7564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.484856', 'step': 7564, 'epoch': 2} {'type': 'loss', 'content': 0.1555517613887787, 'timestamp': '2025-09-30 22:21:24.492852', 'step': 7565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.531383', 'step': 7565, 'epoch': 2} {'type': 'loss', 'content': 0.11229398846626282, 'timestamp': '2025-09-30 22:21:24.540423', 'step': 7566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:24.577375', 'step': 7566, 'epoch': 2} {'type': 'loss', 'content': 0.14862143993377686, 'timestamp': '2025-09-30 22:21:24.586817', 'step': 7567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:24.621631', 'step': 7567, 'epoch': 2} {'type': 'loss', 'content': 0.1350914090871811, 'timestamp': '2025-09-30 22:21:24.650919', 'step': 7568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:24.688645', 'step': 7568, 'epoch': 2} {'type': 'loss', 'content': 0.10932652652263641, 'timestamp': '2025-09-30 22:21:24.691839', 'step': 7569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.723979', 'step': 7569, 'epoch': 2} {'type': 'loss', 'content': 0.20766307413578033, 'timestamp': '2025-09-30 22:21:24.727005', 'step': 7570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.764099', 'step': 7570, 'epoch': 2} {'type': 'loss', 'content': 0.10429175943136215, 'timestamp': '2025-09-30 22:21:24.772257', 'step': 7571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:24.819921', 'step': 7571, 'epoch': 2} {'type': 'loss', 'content': 0.08906412869691849, 'timestamp': '2025-09-30 22:21:24.858653', 'step': 7572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.904846', 'step': 7572, 'epoch': 2} {'type': 'loss', 'content': 0.13598211109638214, 'timestamp': '2025-09-30 22:21:24.921268', 'step': 7573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:24.967939', 'step': 7573, 'epoch': 2} {'type': 'loss', 'content': 0.23187479376792908, 'timestamp': '2025-09-30 22:21:24.983001', 'step': 7574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:25.027034', 'step': 7574, 'epoch': 2} {'type': 'loss', 'content': 0.09667672216892242, 'timestamp': '2025-09-30 22:21:25.046257', 'step': 7575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.084656', 'step': 7575, 'epoch': 2} {'type': 'loss', 'content': 0.21715541183948517, 'timestamp': '2025-09-30 22:21:25.123541', 'step': 7576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.168472', 'step': 7576, 'epoch': 2} {'type': 'loss', 'content': 0.1427820920944214, 'timestamp': '2025-09-30 22:21:25.186626', 'step': 7577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.232489', 'step': 7577, 'epoch': 2} {'type': 'loss', 'content': 0.1931297928094864, 'timestamp': '2025-09-30 22:21:25.240309', 'step': 7578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:25.271835', 'step': 7578, 'epoch': 2} {'type': 'loss', 'content': 0.07291028648614883, 'timestamp': '2025-09-30 22:21:25.279965', 'step': 7579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:25.314720', 'step': 7579, 'epoch': 2} {'type': 'loss', 'content': 0.22592660784721375, 'timestamp': '2025-09-30 22:21:25.345298', 'step': 7580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.381671', 'step': 7580, 'epoch': 2} {'type': 'loss', 'content': 0.15766887366771698, 'timestamp': '2025-09-30 22:21:25.389431', 'step': 7581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:25.424472', 'step': 7581, 'epoch': 2} {'type': 'loss', 'content': 0.13784727454185486, 'timestamp': '2025-09-30 22:21:25.433460', 'step': 7582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:25.469350', 'step': 7582, 'epoch': 2} {'type': 'loss', 'content': 0.2183414101600647, 'timestamp': '2025-09-30 22:21:25.477274', 'step': 7583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:25.513187', 'step': 7583, 'epoch': 2} {'type': 'loss', 'content': 0.11214812099933624, 'timestamp': '2025-09-30 22:21:25.542315', 'step': 7584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.576617', 'step': 7584, 'epoch': 2} {'type': 'loss', 'content': 0.07329708337783813, 'timestamp': '2025-09-30 22:21:25.584028', 'step': 7585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.617358', 'step': 7585, 'epoch': 2} {'type': 'loss', 'content': 0.08963248133659363, 'timestamp': '2025-09-30 22:21:25.630523', 'step': 7586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.668102', 'step': 7586, 'epoch': 2} {'type': 'loss', 'content': 0.15191106498241425, 'timestamp': '2025-09-30 22:21:25.679999', 'step': 7587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.716688', 'step': 7587, 'epoch': 2} {'type': 'loss', 'content': 0.31218472123146057, 'timestamp': '2025-09-30 22:21:25.750068', 'step': 7588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:25.789160', 'step': 7588, 'epoch': 2} {'type': 'loss', 'content': 0.18592089414596558, 'timestamp': '2025-09-30 22:21:25.800207', 'step': 7589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:25.837304', 'step': 7589, 'epoch': 2} {'type': 'loss', 'content': 0.12833431363105774, 'timestamp': '2025-09-30 22:21:25.845125', 'step': 7590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.880463', 'step': 7590, 'epoch': 2} {'type': 'loss', 'content': 0.10358075052499771, 'timestamp': '2025-09-30 22:21:25.886398', 'step': 7591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.922463', 'step': 7591, 'epoch': 2} {'type': 'loss', 'content': 0.14795002341270447, 'timestamp': '2025-09-30 22:21:25.952196', 'step': 7592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:25.992273', 'step': 7592, 'epoch': 2} {'type': 'loss', 'content': 0.18793891370296478, 'timestamp': '2025-09-30 22:21:25.997657', 'step': 7593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.032810', 'step': 7593, 'epoch': 2} {'type': 'loss', 'content': 0.1958930343389511, 'timestamp': '2025-09-30 22:21:26.038930', 'step': 7594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.075396', 'step': 7594, 'epoch': 2} {'type': 'loss', 'content': 0.13336482644081116, 'timestamp': '2025-09-30 22:21:26.083757', 'step': 7595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:26.121380', 'step': 7595, 'epoch': 2} {'type': 'loss', 'content': 0.11900562047958374, 'timestamp': '2025-09-30 22:21:26.151791', 'step': 7596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:26.187399', 'step': 7596, 'epoch': 2} {'type': 'loss', 'content': 0.14378012716770172, 'timestamp': '2025-09-30 22:21:26.198734', 'step': 7597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.236804', 'step': 7597, 'epoch': 2} {'type': 'loss', 'content': 0.1368108093738556, 'timestamp': '2025-09-30 22:21:26.246973', 'step': 7598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.288676', 'step': 7598, 'epoch': 2} {'type': 'loss', 'content': 0.12455275654792786, 'timestamp': '2025-09-30 22:21:26.301812', 'step': 7599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:26.341048', 'step': 7599, 'epoch': 2} {'type': 'loss', 'content': 0.18424083292484283, 'timestamp': '2025-09-30 22:21:26.382654', 'step': 7600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.428245', 'step': 7600, 'epoch': 2} {'type': 'loss', 'content': 0.17487052083015442, 'timestamp': '2025-09-30 22:21:26.433362', 'step': 7601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:26.493103', 'step': 7601, 'epoch': 2} {'type': 'loss', 'content': 0.14442862570285797, 'timestamp': '2025-09-30 22:21:26.510783', 'step': 7602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.555996', 'step': 7602, 'epoch': 2} {'type': 'loss', 'content': 0.19178056716918945, 'timestamp': '2025-09-30 22:21:26.573354', 'step': 7603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:26.607544', 'step': 7603, 'epoch': 2} {'type': 'loss', 'content': 0.1541188657283783, 'timestamp': '2025-09-30 22:21:26.649038', 'step': 7604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.693989', 'step': 7604, 'epoch': 2} {'type': 'loss', 'content': 0.1513262391090393, 'timestamp': '2025-09-30 22:21:26.711378', 'step': 7605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:26.755799', 'step': 7605, 'epoch': 2} {'type': 'loss', 'content': 0.20703139901161194, 'timestamp': '2025-09-30 22:21:26.773754', 'step': 7606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:26.813585', 'step': 7606, 'epoch': 2} {'type': 'loss', 'content': 0.14451611042022705, 'timestamp': '2025-09-30 22:21:26.824694', 'step': 7607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:26.863881', 'step': 7607, 'epoch': 2} {'type': 'loss', 'content': 0.12877880036830902, 'timestamp': '2025-09-30 22:21:26.897268', 'step': 7608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:26.936772', 'step': 7608, 'epoch': 2} {'type': 'loss', 'content': 0.07977055758237839, 'timestamp': '2025-09-30 22:21:26.948834', 'step': 7609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:26.988470', 'step': 7609, 'epoch': 2} {'type': 'loss', 'content': 0.13622421026229858, 'timestamp': '2025-09-30 22:21:26.993188', 'step': 7610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:27.023916', 'step': 7610, 'epoch': 2} {'type': 'loss', 'content': 0.1082858070731163, 'timestamp': '2025-09-30 22:21:27.034597', 'step': 7611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:27.073062', 'step': 7611, 'epoch': 2} {'type': 'loss', 'content': 0.07629426568746567, 'timestamp': '2025-09-30 22:21:27.108800', 'step': 7612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:27.149335', 'step': 7612, 'epoch': 2} {'type': 'loss', 'content': 0.13758796453475952, 'timestamp': '2025-09-30 22:21:27.162003', 'step': 7613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:27.201856', 'step': 7613, 'epoch': 2} {'type': 'loss', 'content': 0.17389743030071259, 'timestamp': '2025-09-30 22:21:27.205474', 'step': 7614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:27.236696', 'step': 7614, 'epoch': 2} {'type': 'loss', 'content': 0.2596082091331482, 'timestamp': '2025-09-30 22:21:27.251472', 'step': 7615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:27.292049', 'step': 7615, 'epoch': 2} {'type': 'loss', 'content': 0.15062329173088074, 'timestamp': '2025-09-30 22:21:27.324467', 'step': 7616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:27.362074', 'step': 7616, 'epoch': 2} {'type': 'loss', 'content': 0.0817176029086113, 'timestamp': '2025-09-30 22:21:27.366270', 'step': 7617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:27.405543', 'step': 7617, 'epoch': 2} {'type': 'loss', 'content': 0.18618124723434448, 'timestamp': '2025-09-30 22:21:27.417747', 'step': 7618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:27.460116', 'step': 7618, 'epoch': 2} {'type': 'loss', 'content': 0.12600281834602356, 'timestamp': '2025-09-30 22:21:27.475393', 'step': 7619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:27.518192', 'step': 7619, 'epoch': 2} {'type': 'loss', 'content': 0.11885883659124374, 'timestamp': '2025-09-30 22:21:27.554397', 'step': 7620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:27.595047', 'step': 7620, 'epoch': 2} {'type': 'loss', 'content': 0.15184874832630157, 'timestamp': '2025-09-30 22:21:27.610703', 'step': 7621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:27.662114', 'step': 7621, 'epoch': 2} {'type': 'loss', 'content': 0.20482207834720612, 'timestamp': '2025-09-30 22:21:27.674523', 'step': 7622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:27.719000', 'step': 7622, 'epoch': 2} {'type': 'loss', 'content': 0.11144611984491348, 'timestamp': '2025-09-30 22:21:27.724029', 'step': 7623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:27.754829', 'step': 7623, 'epoch': 2} {'type': 'loss', 'content': 0.10696792602539062, 'timestamp': '2025-09-30 22:21:27.787607', 'step': 7624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:27.826108', 'step': 7624, 'epoch': 2} {'type': 'loss', 'content': 0.15352827310562134, 'timestamp': '2025-09-30 22:21:27.840325', 'step': 7625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:27.880766', 'step': 7625, 'epoch': 2} {'type': 'loss', 'content': 0.11144556105136871, 'timestamp': '2025-09-30 22:21:27.893221', 'step': 7626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:27.931839', 'step': 7626, 'epoch': 2} {'type': 'loss', 'content': 0.08312662690877914, 'timestamp': '2025-09-30 22:21:27.947727', 'step': 7627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:27.990480', 'step': 7627, 'epoch': 2} {'type': 'loss', 'content': 0.09620782732963562, 'timestamp': '2025-09-30 22:21:28.026758', 'step': 7628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.059898', 'step': 7628, 'epoch': 2} {'type': 'loss', 'content': 0.07477561384439468, 'timestamp': '2025-09-30 22:21:28.075205', 'step': 7629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:28.120396', 'step': 7629, 'epoch': 2} {'type': 'loss', 'content': 0.11066541075706482, 'timestamp': '2025-09-30 22:21:28.133165', 'step': 7630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.174095', 'step': 7630, 'epoch': 2} {'type': 'loss', 'content': 0.1015794575214386, 'timestamp': '2025-09-30 22:21:28.184464', 'step': 7631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.227630', 'step': 7631, 'epoch': 2} {'type': 'loss', 'content': 0.1869940459728241, 'timestamp': '2025-09-30 22:21:28.264487', 'step': 7632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.308132', 'step': 7632, 'epoch': 2} {'type': 'loss', 'content': 0.2203626036643982, 'timestamp': '2025-09-30 22:21:28.312905', 'step': 7633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.344444', 'step': 7633, 'epoch': 2} {'type': 'loss', 'content': 0.14981000125408173, 'timestamp': '2025-09-30 22:21:28.349820', 'step': 7634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.382913', 'step': 7634, 'epoch': 2} {'type': 'loss', 'content': 0.17096011340618134, 'timestamp': '2025-09-30 22:21:28.393762', 'step': 7635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.437283', 'step': 7635, 'epoch': 2} {'type': 'loss', 'content': 0.21719492971897125, 'timestamp': '2025-09-30 22:21:28.463803', 'step': 7636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:28.508786', 'step': 7636, 'epoch': 2} {'type': 'loss', 'content': 0.17476749420166016, 'timestamp': '2025-09-30 22:21:28.524660', 'step': 7637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.568756', 'step': 7637, 'epoch': 2} {'type': 'loss', 'content': 0.06193865090608597, 'timestamp': '2025-09-30 22:21:28.574097', 'step': 7638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.621143', 'step': 7638, 'epoch': 2} {'type': 'loss', 'content': 0.22218027710914612, 'timestamp': '2025-09-30 22:21:28.636511', 'step': 7639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:28.667877', 'step': 7639, 'epoch': 2} {'type': 'loss', 'content': 0.09976432472467422, 'timestamp': '2025-09-30 22:21:28.694187', 'step': 7640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:28.874777', 'step': 7640, 'epoch': 2} {'type': 'loss', 'content': 0.16380682587623596, 'timestamp': '2025-09-30 22:21:28.887248', 'step': 7641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.928116', 'step': 7641, 'epoch': 2} {'type': 'loss', 'content': 0.09984496235847473, 'timestamp': '2025-09-30 22:21:28.940826', 'step': 7642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:28.981693', 'step': 7642, 'epoch': 2} {'type': 'loss', 'content': 0.16989265382289886, 'timestamp': '2025-09-30 22:21:28.994518', 'step': 7643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.036993', 'step': 7643, 'epoch': 2} {'type': 'loss', 'content': 0.09235929697751999, 'timestamp': '2025-09-30 22:21:29.070460', 'step': 7644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:29.109470', 'step': 7644, 'epoch': 2} {'type': 'loss', 'content': 0.15261243283748627, 'timestamp': '2025-09-30 22:21:29.125216', 'step': 7645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.167657', 'step': 7645, 'epoch': 2} {'type': 'loss', 'content': 0.08209456503391266, 'timestamp': '2025-09-30 22:21:29.182512', 'step': 7646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:29.224228', 'step': 7646, 'epoch': 2} {'type': 'loss', 'content': 0.13014334440231323, 'timestamp': '2025-09-30 22:21:29.237985', 'step': 7647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:29.279613', 'step': 7647, 'epoch': 2} {'type': 'loss', 'content': 0.15304405987262726, 'timestamp': '2025-09-30 22:21:29.314503', 'step': 7648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:29.353467', 'step': 7648, 'epoch': 2} {'type': 'loss', 'content': 0.1740293651819229, 'timestamp': '2025-09-30 22:21:29.368794', 'step': 7649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:29.410836', 'step': 7649, 'epoch': 2} {'type': 'loss', 'content': 0.17785613238811493, 'timestamp': '2025-09-30 22:21:29.416042', 'step': 7650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:29.460396', 'step': 7650, 'epoch': 2} {'type': 'loss', 'content': 0.07034367322921753, 'timestamp': '2025-09-30 22:21:29.473551', 'step': 7651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.506317', 'step': 7651, 'epoch': 2} {'type': 'loss', 'content': 0.19604431092739105, 'timestamp': '2025-09-30 22:21:29.541965', 'step': 7652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:29.575461', 'step': 7652, 'epoch': 2} {'type': 'loss', 'content': 0.1452806442975998, 'timestamp': '2025-09-30 22:21:29.586775', 'step': 7653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:29.638781', 'step': 7653, 'epoch': 2} {'type': 'loss', 'content': 0.18167094886302948, 'timestamp': '2025-09-30 22:21:29.654169', 'step': 7654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:29.687489', 'step': 7654, 'epoch': 2} {'type': 'loss', 'content': 0.14945915341377258, 'timestamp': '2025-09-30 22:21:29.692126', 'step': 7655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.724605', 'step': 7655, 'epoch': 2} {'type': 'loss', 'content': 0.12016293406486511, 'timestamp': '2025-09-30 22:21:29.763113', 'step': 7656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.803733', 'step': 7656, 'epoch': 2} {'type': 'loss', 'content': 0.13144750893115997, 'timestamp': '2025-09-30 22:21:29.820699', 'step': 7657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:29.863191', 'step': 7657, 'epoch': 2} {'type': 'loss', 'content': 0.17834797501564026, 'timestamp': '2025-09-30 22:21:29.867368', 'step': 7658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:29.910158', 'step': 7658, 'epoch': 2} {'type': 'loss', 'content': 0.08516062796115875, 'timestamp': '2025-09-30 22:21:29.914212', 'step': 7659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:29.955130', 'step': 7659, 'epoch': 2} {'type': 'loss', 'content': 0.13585391640663147, 'timestamp': '2025-09-30 22:21:29.981700', 'step': 7660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:30.022153', 'step': 7660, 'epoch': 2} {'type': 'loss', 'content': 0.2201530635356903, 'timestamp': '2025-09-30 22:21:30.026202', 'step': 7661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:30.059548', 'step': 7661, 'epoch': 2} {'type': 'loss', 'content': 0.09572990238666534, 'timestamp': '2025-09-30 22:21:30.067486', 'step': 7662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:30.109381', 'step': 7662, 'epoch': 2} {'type': 'loss', 'content': 0.09758524596691132, 'timestamp': '2025-09-30 22:21:30.122154', 'step': 7663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.160322', 'step': 7663, 'epoch': 2} {'type': 'loss', 'content': 0.11785423755645752, 'timestamp': '2025-09-30 22:21:30.195425', 'step': 7664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.235906', 'step': 7664, 'epoch': 2} {'type': 'loss', 'content': 0.19048522412776947, 'timestamp': '2025-09-30 22:21:30.241727', 'step': 7665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.280253', 'step': 7665, 'epoch': 2} {'type': 'loss', 'content': 0.10530542582273483, 'timestamp': '2025-09-30 22:21:30.292716', 'step': 7666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:30.329942', 'step': 7666, 'epoch': 2} {'type': 'loss', 'content': 0.175365149974823, 'timestamp': '2025-09-30 22:21:30.343493', 'step': 7667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.381978', 'step': 7667, 'epoch': 2} {'type': 'loss', 'content': 0.15893925726413727, 'timestamp': '2025-09-30 22:21:30.415233', 'step': 7668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.452435', 'step': 7668, 'epoch': 2} {'type': 'loss', 'content': 0.14806586503982544, 'timestamp': '2025-09-30 22:21:30.463432', 'step': 7669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.503705', 'step': 7669, 'epoch': 2} {'type': 'loss', 'content': 0.27390187978744507, 'timestamp': '2025-09-30 22:21:30.507515', 'step': 7670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:30.539042', 'step': 7670, 'epoch': 2} {'type': 'loss', 'content': 0.12537361681461334, 'timestamp': '2025-09-30 22:21:30.542542', 'step': 7671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.580659', 'step': 7671, 'epoch': 2} {'type': 'loss', 'content': 0.16772469878196716, 'timestamp': '2025-09-30 22:21:30.613008', 'step': 7672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.645666', 'step': 7672, 'epoch': 2} {'type': 'loss', 'content': 0.11828099936246872, 'timestamp': '2025-09-30 22:21:30.658589', 'step': 7673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.708331', 'step': 7673, 'epoch': 2} {'type': 'loss', 'content': 0.22161777317523956, 'timestamp': '2025-09-30 22:21:30.721959', 'step': 7674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:30.761788', 'step': 7674, 'epoch': 2} {'type': 'loss', 'content': 0.11465495079755783, 'timestamp': '2025-09-30 22:21:30.771428', 'step': 7675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.814562', 'step': 7675, 'epoch': 2} {'type': 'loss', 'content': 0.20341303944587708, 'timestamp': '2025-09-30 22:21:30.842475', 'step': 7676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.873714', 'step': 7676, 'epoch': 2} {'type': 'loss', 'content': 0.16564854979515076, 'timestamp': '2025-09-30 22:21:30.877064', 'step': 7677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:30.916155', 'step': 7677, 'epoch': 2} {'type': 'loss', 'content': 0.13676071166992188, 'timestamp': '2025-09-30 22:21:30.930704', 'step': 7678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:30.980159', 'step': 7678, 'epoch': 2} {'type': 'loss', 'content': 0.19950062036514282, 'timestamp': '2025-09-30 22:21:30.992699', 'step': 7679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.031121', 'step': 7679, 'epoch': 2} {'type': 'loss', 'content': 0.13344989717006683, 'timestamp': '2025-09-30 22:21:31.065355', 'step': 7680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.103931', 'step': 7680, 'epoch': 2} {'type': 'loss', 'content': 0.1788424402475357, 'timestamp': '2025-09-30 22:21:31.115492', 'step': 7681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:31.154576', 'step': 7681, 'epoch': 2} {'type': 'loss', 'content': 0.0761115550994873, 'timestamp': '2025-09-30 22:21:31.158262', 'step': 7682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.197466', 'step': 7682, 'epoch': 2} {'type': 'loss', 'content': 0.19155649840831757, 'timestamp': '2025-09-30 22:21:31.209049', 'step': 7683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:31.255511', 'step': 7683, 'epoch': 2} {'type': 'loss', 'content': 0.0910983756184578, 'timestamp': '2025-09-30 22:21:31.286962', 'step': 7684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.327122', 'step': 7684, 'epoch': 2} {'type': 'loss', 'content': 0.10117308050394058, 'timestamp': '2025-09-30 22:21:31.336803', 'step': 7685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.368174', 'step': 7685, 'epoch': 2} {'type': 'loss', 'content': 0.16886238753795624, 'timestamp': '2025-09-30 22:21:31.379199', 'step': 7686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:31.411124', 'step': 7686, 'epoch': 2} {'type': 'loss', 'content': 0.16955377161502838, 'timestamp': '2025-09-30 22:21:31.418243', 'step': 7687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:31.450272', 'step': 7687, 'epoch': 2} {'type': 'loss', 'content': 0.12180746346712112, 'timestamp': '2025-09-30 22:21:31.475148', 'step': 7688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.517540', 'step': 7688, 'epoch': 2} {'type': 'loss', 'content': 0.19953365623950958, 'timestamp': '2025-09-30 22:21:31.521491', 'step': 7689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:31.559645', 'step': 7689, 'epoch': 2} {'type': 'loss', 'content': 0.07791435718536377, 'timestamp': '2025-09-30 22:21:31.564312', 'step': 7690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.595634', 'step': 7690, 'epoch': 2} {'type': 'loss', 'content': 0.12281718105077744, 'timestamp': '2025-09-30 22:21:31.610234', 'step': 7691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.651176', 'step': 7691, 'epoch': 2} {'type': 'loss', 'content': 0.2283630520105362, 'timestamp': '2025-09-30 22:21:31.676570', 'step': 7692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:31.710754', 'step': 7692, 'epoch': 2} {'type': 'loss', 'content': 0.12516149878501892, 'timestamp': '2025-09-30 22:21:31.715330', 'step': 7693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:31.755286', 'step': 7693, 'epoch': 2} {'type': 'loss', 'content': 0.12787234783172607, 'timestamp': '2025-09-30 22:21:31.763455', 'step': 7694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:31.799475', 'step': 7694, 'epoch': 2} {'type': 'loss', 'content': 0.2129843384027481, 'timestamp': '2025-09-30 22:21:31.807838', 'step': 7695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:31.849043', 'step': 7695, 'epoch': 2} {'type': 'loss', 'content': 0.07755588740110397, 'timestamp': '2025-09-30 22:21:31.882508', 'step': 7696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.914543', 'step': 7696, 'epoch': 2} {'type': 'loss', 'content': 0.17382997274398804, 'timestamp': '2025-09-30 22:21:31.928078', 'step': 7697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:31.961087', 'step': 7697, 'epoch': 2} {'type': 'loss', 'content': 0.23378825187683105, 'timestamp': '2025-09-30 22:21:31.967057', 'step': 7698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.001259', 'step': 7698, 'epoch': 2} {'type': 'loss', 'content': 0.15636245906352997, 'timestamp': '2025-09-30 22:21:32.012704', 'step': 7699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:32.051168', 'step': 7699, 'epoch': 2} {'type': 'loss', 'content': 0.06230897083878517, 'timestamp': '2025-09-30 22:21:32.088620', 'step': 7700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.122573', 'step': 7700, 'epoch': 2} {'type': 'loss', 'content': 0.19698619842529297, 'timestamp': '2025-09-30 22:21:32.126777', 'step': 7701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.163076', 'step': 7701, 'epoch': 2} {'type': 'loss', 'content': 0.07376415282487869, 'timestamp': '2025-09-30 22:21:32.167838', 'step': 7702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.204894', 'step': 7702, 'epoch': 2} {'type': 'loss', 'content': 0.2737869322299957, 'timestamp': '2025-09-30 22:21:32.209429', 'step': 7703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:32.249754', 'step': 7703, 'epoch': 2} {'type': 'loss', 'content': 0.1593942940235138, 'timestamp': '2025-09-30 22:21:32.277742', 'step': 7704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.319409', 'step': 7704, 'epoch': 2} {'type': 'loss', 'content': 0.1917765736579895, 'timestamp': '2025-09-30 22:21:32.333194', 'step': 7705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.371745', 'step': 7705, 'epoch': 2} {'type': 'loss', 'content': 0.2264634221792221, 'timestamp': '2025-09-30 22:21:32.383568', 'step': 7706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.428957', 'step': 7706, 'epoch': 2} {'type': 'loss', 'content': 0.06480009108781815, 'timestamp': '2025-09-30 22:21:32.437353', 'step': 7707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.475435', 'step': 7707, 'epoch': 2} {'type': 'loss', 'content': 0.1033298671245575, 'timestamp': '2025-09-30 22:21:32.508896', 'step': 7708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:32.541876', 'step': 7708, 'epoch': 2} {'type': 'loss', 'content': 0.128688782453537, 'timestamp': '2025-09-30 22:21:32.546338', 'step': 7709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:32.580283', 'step': 7709, 'epoch': 2} {'type': 'loss', 'content': 0.1371399313211441, 'timestamp': '2025-09-30 22:21:32.593655', 'step': 7710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.625745', 'step': 7710, 'epoch': 2} {'type': 'loss', 'content': 0.08428581804037094, 'timestamp': '2025-09-30 22:21:32.629560', 'step': 7711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.671873', 'step': 7711, 'epoch': 2} {'type': 'loss', 'content': 0.15137475728988647, 'timestamp': '2025-09-30 22:21:32.705269', 'step': 7712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.744548', 'step': 7712, 'epoch': 2} {'type': 'loss', 'content': 0.1954280138015747, 'timestamp': '2025-09-30 22:21:32.749820', 'step': 7713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.781923', 'step': 7713, 'epoch': 2} {'type': 'loss', 'content': 0.1191970556974411, 'timestamp': '2025-09-30 22:21:32.793183', 'step': 7714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:32.825182', 'step': 7714, 'epoch': 2} {'type': 'loss', 'content': 0.10042361915111542, 'timestamp': '2025-09-30 22:21:32.835943', 'step': 7715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:32.877896', 'step': 7715, 'epoch': 2} {'type': 'loss', 'content': 0.080617755651474, 'timestamp': '2025-09-30 22:21:32.912421', 'step': 7716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:32.965175', 'step': 7716, 'epoch': 2} {'type': 'loss', 'content': 0.14686071872711182, 'timestamp': '2025-09-30 22:21:32.968918', 'step': 7717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.007933', 'step': 7717, 'epoch': 2} {'type': 'loss', 'content': 0.10018053650856018, 'timestamp': '2025-09-30 22:21:33.021127', 'step': 7718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:33.077021', 'step': 7718, 'epoch': 2} {'type': 'loss', 'content': 0.10560614615678787, 'timestamp': '2025-09-30 22:21:33.090100', 'step': 7719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.133764', 'step': 7719, 'epoch': 2} {'type': 'loss', 'content': 0.1912764012813568, 'timestamp': '2025-09-30 22:21:33.169865', 'step': 7720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.237388', 'step': 7720, 'epoch': 2} {'type': 'loss', 'content': 0.13755382597446442, 'timestamp': '2025-09-30 22:21:33.256874', 'step': 7721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:33.311470', 'step': 7721, 'epoch': 2} {'type': 'loss', 'content': 0.07671016454696655, 'timestamp': '2025-09-30 22:21:33.326849', 'step': 7722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.377441', 'step': 7722, 'epoch': 2} {'type': 'loss', 'content': 0.13745597004890442, 'timestamp': '2025-09-30 22:21:33.395758', 'step': 7723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:21:33.456518', 'step': 7723, 'epoch': 2} {'type': 'loss', 'content': 0.147008016705513, 'timestamp': '2025-09-30 22:21:33.488100', 'step': 7724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.531295', 'step': 7724, 'epoch': 2} {'type': 'loss', 'content': 0.14122286438941956, 'timestamp': '2025-09-30 22:21:33.535003', 'step': 7725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:33.575385', 'step': 7725, 'epoch': 2} {'type': 'loss', 'content': 0.09789488464593887, 'timestamp': '2025-09-30 22:21:33.587876', 'step': 7726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.632477', 'step': 7726, 'epoch': 2} {'type': 'loss', 'content': 0.09684624522924423, 'timestamp': '2025-09-30 22:21:33.646010', 'step': 7727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:33.695828', 'step': 7727, 'epoch': 2} {'type': 'loss', 'content': 0.17929376661777496, 'timestamp': '2025-09-30 22:21:33.720847', 'step': 7728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:33.814611', 'step': 7728, 'epoch': 2} {'type': 'loss', 'content': 0.11295710504055023, 'timestamp': '2025-09-30 22:21:33.827809', 'step': 7729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:33.880162', 'step': 7729, 'epoch': 2} {'type': 'loss', 'content': 0.07255922257900238, 'timestamp': '2025-09-30 22:21:33.888527', 'step': 7730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:33.933094', 'step': 7730, 'epoch': 2} {'type': 'loss', 'content': 0.2094344049692154, 'timestamp': '2025-09-30 22:21:33.944484', 'step': 7731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.010694', 'step': 7731, 'epoch': 2} {'type': 'loss', 'content': 0.1898127794265747, 'timestamp': '2025-09-30 22:21:34.044310', 'step': 7732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:34.091242', 'step': 7732, 'epoch': 2} {'type': 'loss', 'content': 0.09127107262611389, 'timestamp': '2025-09-30 22:21:34.103539', 'step': 7733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.155923', 'step': 7733, 'epoch': 2} {'type': 'loss', 'content': 0.10235600918531418, 'timestamp': '2025-09-30 22:21:34.167909', 'step': 7734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:34.211646', 'step': 7734, 'epoch': 2} {'type': 'loss', 'content': 0.10442563146352768, 'timestamp': '2025-09-30 22:21:34.225477', 'step': 7735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:34.282827', 'step': 7735, 'epoch': 2} {'type': 'loss', 'content': 0.08389879763126373, 'timestamp': '2025-09-30 22:21:34.313402', 'step': 7736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.355555', 'step': 7736, 'epoch': 2} {'type': 'loss', 'content': 0.1398399919271469, 'timestamp': '2025-09-30 22:21:34.366582', 'step': 7737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:34.406002', 'step': 7737, 'epoch': 2} {'type': 'loss', 'content': 0.12743309140205383, 'timestamp': '2025-09-30 22:21:34.418413', 'step': 7738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:34.458429', 'step': 7738, 'epoch': 2} {'type': 'loss', 'content': 0.12881754338741302, 'timestamp': '2025-09-30 22:21:34.470551', 'step': 7739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.508987', 'step': 7739, 'epoch': 2} {'type': 'loss', 'content': 0.16303962469100952, 'timestamp': '2025-09-30 22:21:34.538842', 'step': 7740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:34.578483', 'step': 7740, 'epoch': 2} {'type': 'loss', 'content': 0.06018013879656792, 'timestamp': '2025-09-30 22:21:34.582362', 'step': 7741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.616338', 'step': 7741, 'epoch': 2} {'type': 'loss', 'content': 0.11784616857767105, 'timestamp': '2025-09-30 22:21:34.620788', 'step': 7742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:34.661348', 'step': 7742, 'epoch': 2} {'type': 'loss', 'content': 0.08775773644447327, 'timestamp': '2025-09-30 22:21:34.666106', 'step': 7743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:34.707804', 'step': 7743, 'epoch': 2} {'type': 'loss', 'content': 0.06047132983803749, 'timestamp': '2025-09-30 22:21:34.741558', 'step': 7744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:34.776022', 'step': 7744, 'epoch': 2} {'type': 'loss', 'content': 0.15155136585235596, 'timestamp': '2025-09-30 22:21:34.779299', 'step': 7745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:34.818867', 'step': 7745, 'epoch': 2} {'type': 'loss', 'content': 0.21421121060848236, 'timestamp': '2025-09-30 22:21:34.829104', 'step': 7746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.870070', 'step': 7746, 'epoch': 2} {'type': 'loss', 'content': 0.225539892911911, 'timestamp': '2025-09-30 22:21:34.882584', 'step': 7747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:34.922939', 'step': 7747, 'epoch': 2} {'type': 'loss', 'content': 0.2488614171743393, 'timestamp': '2025-09-30 22:21:34.955614', 'step': 7748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:34.993321', 'step': 7748, 'epoch': 2} {'type': 'loss', 'content': 0.1888279765844345, 'timestamp': '2025-09-30 22:21:35.004758', 'step': 7749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:35.036316', 'step': 7749, 'epoch': 2} {'type': 'loss', 'content': 0.10448333621025085, 'timestamp': '2025-09-30 22:21:35.049196', 'step': 7750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:35.090542', 'step': 7750, 'epoch': 2} {'type': 'loss', 'content': 0.09600735455751419, 'timestamp': '2025-09-30 22:21:35.094047', 'step': 7751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:35.127149', 'step': 7751, 'epoch': 2} {'type': 'loss', 'content': 0.20850419998168945, 'timestamp': '2025-09-30 22:21:35.154490', 'step': 7752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:35.196180', 'step': 7752, 'epoch': 2} {'type': 'loss', 'content': 0.19776307046413422, 'timestamp': '2025-09-30 22:21:35.200793', 'step': 7753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.231231', 'step': 7753, 'epoch': 2} {'type': 'loss', 'content': 0.1671377420425415, 'timestamp': '2025-09-30 22:21:35.245920', 'step': 7754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:35.285444', 'step': 7754, 'epoch': 2} {'type': 'loss', 'content': 0.15167315304279327, 'timestamp': '2025-09-30 22:21:35.301035', 'step': 7755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.340941', 'step': 7755, 'epoch': 2} {'type': 'loss', 'content': 0.09038453549146652, 'timestamp': '2025-09-30 22:21:35.376085', 'step': 7756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:35.413834', 'step': 7756, 'epoch': 2} {'type': 'loss', 'content': 0.12776978313922882, 'timestamp': '2025-09-30 22:21:35.425833', 'step': 7757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.464558', 'step': 7757, 'epoch': 2} {'type': 'loss', 'content': 0.11400151252746582, 'timestamp': '2025-09-30 22:21:35.477786', 'step': 7758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:35.518814', 'step': 7758, 'epoch': 2} {'type': 'loss', 'content': 0.10178915411233902, 'timestamp': '2025-09-30 22:21:35.531798', 'step': 7759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.571251', 'step': 7759, 'epoch': 2} {'type': 'loss', 'content': 0.14438021183013916, 'timestamp': '2025-09-30 22:21:35.596735', 'step': 7760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.640557', 'step': 7760, 'epoch': 2} {'type': 'loss', 'content': 0.18468573689460754, 'timestamp': '2025-09-30 22:21:35.643719', 'step': 7761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:35.684772', 'step': 7761, 'epoch': 2} {'type': 'loss', 'content': 0.15438736975193024, 'timestamp': '2025-09-30 22:21:35.689724', 'step': 7762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:35.729846', 'step': 7762, 'epoch': 2} {'type': 'loss', 'content': 0.12514911592006683, 'timestamp': '2025-09-30 22:21:35.740440', 'step': 7763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.784058', 'step': 7763, 'epoch': 2} {'type': 'loss', 'content': 0.13892611861228943, 'timestamp': '2025-09-30 22:21:35.819519', 'step': 7764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:21:35.857189', 'step': 7764, 'epoch': 2} {'type': 'loss', 'content': 0.1132320985198021, 'timestamp': '2025-09-30 22:21:35.873095', 'step': 7765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:35.905221', 'step': 7765, 'epoch': 2} {'type': 'loss', 'content': 0.18918491899967194, 'timestamp': '2025-09-30 22:21:35.919349', 'step': 7766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:35.958106', 'step': 7766, 'epoch': 2} {'type': 'loss', 'content': 0.13964271545410156, 'timestamp': '2025-09-30 22:21:35.968887', 'step': 7767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:36.009904', 'step': 7767, 'epoch': 2} {'type': 'loss', 'content': 0.10425012558698654, 'timestamp': '2025-09-30 22:21:36.045786', 'step': 7768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.078190', 'step': 7768, 'epoch': 2} {'type': 'loss', 'content': 0.12584826350212097, 'timestamp': '2025-09-30 22:21:36.082325', 'step': 7769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:36.120704', 'step': 7769, 'epoch': 2} {'type': 'loss', 'content': 0.1548772156238556, 'timestamp': '2025-09-30 22:21:36.126076', 'step': 7770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.157942', 'step': 7770, 'epoch': 2} {'type': 'loss', 'content': 0.18168514966964722, 'timestamp': '2025-09-30 22:21:36.169038', 'step': 7771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:36.208934', 'step': 7771, 'epoch': 2} {'type': 'loss', 'content': 0.08781063556671143, 'timestamp': '2025-09-30 22:21:36.234676', 'step': 7772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.266592', 'step': 7772, 'epoch': 2} {'type': 'loss', 'content': 0.06886758655309677, 'timestamp': '2025-09-30 22:21:36.280740', 'step': 7773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.312574', 'step': 7773, 'epoch': 2} {'type': 'loss', 'content': 0.12070342898368835, 'timestamp': '2025-09-30 22:21:36.315988', 'step': 7774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:36.358241', 'step': 7774, 'epoch': 2} {'type': 'loss', 'content': 0.10940416902303696, 'timestamp': '2025-09-30 22:21:36.368930', 'step': 7775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:36.409065', 'step': 7775, 'epoch': 2} {'type': 'loss', 'content': 0.09479070454835892, 'timestamp': '2025-09-30 22:21:36.441052', 'step': 7776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:36.473963', 'step': 7776, 'epoch': 2} {'type': 'loss', 'content': 0.09421766549348831, 'timestamp': '2025-09-30 22:21:36.487040', 'step': 7777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:36.525765', 'step': 7777, 'epoch': 2} {'type': 'loss', 'content': 0.10888229310512543, 'timestamp': '2025-09-30 22:21:36.537896', 'step': 7778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.577088', 'step': 7778, 'epoch': 2} {'type': 'loss', 'content': 0.16767027974128723, 'timestamp': '2025-09-30 22:21:36.582225', 'step': 7779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.621346', 'step': 7779, 'epoch': 2} {'type': 'loss', 'content': 0.14836613833904266, 'timestamp': '2025-09-30 22:21:36.657975', 'step': 7780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:36.699480', 'step': 7780, 'epoch': 2} {'type': 'loss', 'content': 0.21869391202926636, 'timestamp': '2025-09-30 22:21:36.713211', 'step': 7781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.754322', 'step': 7781, 'epoch': 2} {'type': 'loss', 'content': 0.12233410775661469, 'timestamp': '2025-09-30 22:21:36.766454', 'step': 7782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:36.807678', 'step': 7782, 'epoch': 2} {'type': 'loss', 'content': 0.1115555539727211, 'timestamp': '2025-09-30 22:21:36.821878', 'step': 7783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:36.862979', 'step': 7783, 'epoch': 2} {'type': 'loss', 'content': 0.085224948823452, 'timestamp': '2025-09-30 22:21:36.897520', 'step': 7784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:36.937417', 'step': 7784, 'epoch': 2} {'type': 'loss', 'content': 0.09810178726911545, 'timestamp': '2025-09-30 22:21:36.952054', 'step': 7785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:36.990754', 'step': 7785, 'epoch': 2} {'type': 'loss', 'content': 0.19415578246116638, 'timestamp': '2025-09-30 22:21:37.004238', 'step': 7786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.048492', 'step': 7786, 'epoch': 2} {'type': 'loss', 'content': 0.11033836007118225, 'timestamp': '2025-09-30 22:21:37.065386', 'step': 7787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.109703', 'step': 7787, 'epoch': 2} {'type': 'loss', 'content': 0.1654941439628601, 'timestamp': '2025-09-30 22:21:37.137033', 'step': 7788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:37.168751', 'step': 7788, 'epoch': 2} {'type': 'loss', 'content': 0.15959595143795013, 'timestamp': '2025-09-30 22:21:37.173684', 'step': 7789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:37.204784', 'step': 7789, 'epoch': 2} {'type': 'loss', 'content': 0.07527200877666473, 'timestamp': '2025-09-30 22:21:37.208957', 'step': 7790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:37.241977', 'step': 7790, 'epoch': 2} {'type': 'loss', 'content': 0.17400501668453217, 'timestamp': '2025-09-30 22:21:37.256889', 'step': 7791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:37.302731', 'step': 7791, 'epoch': 2} {'type': 'loss', 'content': 0.17855583131313324, 'timestamp': '2025-09-30 22:21:37.329555', 'step': 7792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:37.370739', 'step': 7792, 'epoch': 2} {'type': 'loss', 'content': 0.09869436174631119, 'timestamp': '2025-09-30 22:21:37.373530', 'step': 7793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.405790', 'step': 7793, 'epoch': 2} {'type': 'loss', 'content': 0.17102958261966705, 'timestamp': '2025-09-30 22:21:37.420552', 'step': 7794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:37.468245', 'step': 7794, 'epoch': 2} {'type': 'loss', 'content': 0.07417994737625122, 'timestamp': '2025-09-30 22:21:37.474622', 'step': 7795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:37.518470', 'step': 7795, 'epoch': 2} {'type': 'loss', 'content': 0.12159930169582367, 'timestamp': '2025-09-30 22:21:37.545105', 'step': 7796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.576576', 'step': 7796, 'epoch': 2} {'type': 'loss', 'content': 0.14752912521362305, 'timestamp': '2025-09-30 22:21:37.582491', 'step': 7797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:37.614209', 'step': 7797, 'epoch': 2} {'type': 'loss', 'content': 0.0910550057888031, 'timestamp': '2025-09-30 22:21:37.632871', 'step': 7798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:37.682828', 'step': 7798, 'epoch': 2} {'type': 'loss', 'content': 0.1309734731912613, 'timestamp': '2025-09-30 22:21:37.688406', 'step': 7799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.739008', 'step': 7799, 'epoch': 2} {'type': 'loss', 'content': 0.13344085216522217, 'timestamp': '2025-09-30 22:21:37.781489', 'step': 7800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:37.828725', 'step': 7800, 'epoch': 2} {'type': 'loss', 'content': 0.12636668980121613, 'timestamp': '2025-09-30 22:21:37.849520', 'step': 7801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:37.895477', 'step': 7801, 'epoch': 2} {'type': 'loss', 'content': 0.1350388079881668, 'timestamp': '2025-09-30 22:21:37.914344', 'step': 7802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:37.962735', 'step': 7802, 'epoch': 2} {'type': 'loss', 'content': 0.1097068041563034, 'timestamp': '2025-09-30 22:21:37.970905', 'step': 7803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:38.015826', 'step': 7803, 'epoch': 2} {'type': 'loss', 'content': 0.11938076466321945, 'timestamp': '2025-09-30 22:21:38.041435', 'step': 7804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:38.074662', 'step': 7804, 'epoch': 2} {'type': 'loss', 'content': 0.19024667143821716, 'timestamp': '2025-09-30 22:21:38.079040', 'step': 7805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.122611', 'step': 7805, 'epoch': 2} {'type': 'loss', 'content': 0.20386898517608643, 'timestamp': '2025-09-30 22:21:38.137918', 'step': 7806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.180165', 'step': 7806, 'epoch': 2} {'type': 'loss', 'content': 0.1833634078502655, 'timestamp': '2025-09-30 22:21:38.195429', 'step': 7807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:38.236662', 'step': 7807, 'epoch': 2} {'type': 'loss', 'content': 0.09102092683315277, 'timestamp': '2025-09-30 22:21:38.271121', 'step': 7808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.303452', 'step': 7808, 'epoch': 2} {'type': 'loss', 'content': 0.12218382209539413, 'timestamp': '2025-09-30 22:21:38.318504', 'step': 7809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:38.359268', 'step': 7809, 'epoch': 2} {'type': 'loss', 'content': 0.08577483892440796, 'timestamp': '2025-09-30 22:21:38.369245', 'step': 7810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:38.412220', 'step': 7810, 'epoch': 2} {'type': 'loss', 'content': 0.13086041808128357, 'timestamp': '2025-09-30 22:21:38.430269', 'step': 7811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.475487', 'step': 7811, 'epoch': 2} {'type': 'loss', 'content': 0.16858109831809998, 'timestamp': '2025-09-30 22:21:38.509170', 'step': 7812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:38.544998', 'step': 7812, 'epoch': 2} {'type': 'loss', 'content': 0.06444333493709564, 'timestamp': '2025-09-30 22:21:38.554356', 'step': 7813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:38.591052', 'step': 7813, 'epoch': 2} {'type': 'loss', 'content': 0.07426796853542328, 'timestamp': '2025-09-30 22:21:38.600202', 'step': 7814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:38.637424', 'step': 7814, 'epoch': 2} {'type': 'loss', 'content': 0.06661325693130493, 'timestamp': '2025-09-30 22:21:38.644838', 'step': 7815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.680504', 'step': 7815, 'epoch': 2} {'type': 'loss', 'content': 0.2678709924221039, 'timestamp': '2025-09-30 22:21:38.711903', 'step': 7816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:38.751390', 'step': 7816, 'epoch': 2} {'type': 'loss', 'content': 0.14830172061920166, 'timestamp': '2025-09-30 22:21:38.766112', 'step': 7817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:38.809033', 'step': 7817, 'epoch': 2} {'type': 'loss', 'content': 0.05931783467531204, 'timestamp': '2025-09-30 22:21:38.824124', 'step': 7818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:38.865505', 'step': 7818, 'epoch': 2} {'type': 'loss', 'content': 0.08743550628423691, 'timestamp': '2025-09-30 22:21:38.879513', 'step': 7819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:38.922169', 'step': 7819, 'epoch': 2} {'type': 'loss', 'content': 0.19148066639900208, 'timestamp': '2025-09-30 22:21:38.958347', 'step': 7820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.002669', 'step': 7820, 'epoch': 2} {'type': 'loss', 'content': 0.07288981229066849, 'timestamp': '2025-09-30 22:21:39.021985', 'step': 7821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.067059', 'step': 7821, 'epoch': 2} {'type': 'loss', 'content': 0.09402799606323242, 'timestamp': '2025-09-30 22:21:39.086804', 'step': 7822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:39.136156', 'step': 7822, 'epoch': 2} {'type': 'loss', 'content': 0.06765522807836533, 'timestamp': '2025-09-30 22:21:39.157055', 'step': 7823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.216556', 'step': 7823, 'epoch': 2} {'type': 'loss', 'content': 0.10595965385437012, 'timestamp': '2025-09-30 22:21:39.261624', 'step': 7824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:39.311389', 'step': 7824, 'epoch': 2} {'type': 'loss', 'content': 0.2055974006652832, 'timestamp': '2025-09-30 22:21:39.332401', 'step': 7825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.381042', 'step': 7825, 'epoch': 2} {'type': 'loss', 'content': 0.11502441018819809, 'timestamp': '2025-09-30 22:21:39.400726', 'step': 7826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:39.434505', 'step': 7826, 'epoch': 2} {'type': 'loss', 'content': 0.28713458776474, 'timestamp': '2025-09-30 22:21:39.452260', 'step': 7827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.496514', 'step': 7827, 'epoch': 2} {'type': 'loss', 'content': 0.1080654188990593, 'timestamp': '2025-09-30 22:21:39.536517', 'step': 7828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.580505', 'step': 7828, 'epoch': 2} {'type': 'loss', 'content': 0.18939520418643951, 'timestamp': '2025-09-30 22:21:39.596334', 'step': 7829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.628292', 'step': 7829, 'epoch': 2} {'type': 'loss', 'content': 0.05577394366264343, 'timestamp': '2025-09-30 22:21:39.634381', 'step': 7830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.664947', 'step': 7830, 'epoch': 2} {'type': 'loss', 'content': 0.1279384195804596, 'timestamp': '2025-09-30 22:21:39.668766', 'step': 7831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.699935', 'step': 7831, 'epoch': 2} {'type': 'loss', 'content': 0.18214458227157593, 'timestamp': '2025-09-30 22:21:39.725863', 'step': 7832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:39.757437', 'step': 7832, 'epoch': 2} {'type': 'loss', 'content': 0.11271800845861435, 'timestamp': '2025-09-30 22:21:39.774394', 'step': 7833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.820097', 'step': 7833, 'epoch': 2} {'type': 'loss', 'content': 0.13564538955688477, 'timestamp': '2025-09-30 22:21:39.826796', 'step': 7834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.868644', 'step': 7834, 'epoch': 2} {'type': 'loss', 'content': 0.12518493831157684, 'timestamp': '2025-09-30 22:21:39.882344', 'step': 7835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:39.922585', 'step': 7835, 'epoch': 2} {'type': 'loss', 'content': 0.08831735700368881, 'timestamp': '2025-09-30 22:21:39.950394', 'step': 7836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:39.996717', 'step': 7836, 'epoch': 2} {'type': 'loss', 'content': 0.1391257494688034, 'timestamp': '2025-09-30 22:21:40.011985', 'step': 7837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:40.053502', 'step': 7837, 'epoch': 2} {'type': 'loss', 'content': 0.09693149477243423, 'timestamp': '2025-09-30 22:21:40.069975', 'step': 7838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:40.113236', 'step': 7838, 'epoch': 2} {'type': 'loss', 'content': 0.1430412083864212, 'timestamp': '2025-09-30 22:21:40.129525', 'step': 7839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:40.174766', 'step': 7839, 'epoch': 2} {'type': 'loss', 'content': 0.24113047122955322, 'timestamp': '2025-09-30 22:21:40.211615', 'step': 7840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:40.256688', 'step': 7840, 'epoch': 2} {'type': 'loss', 'content': 0.14845745265483856, 'timestamp': '2025-09-30 22:21:40.273306', 'step': 7841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:40.318266', 'step': 7841, 'epoch': 2} {'type': 'loss', 'content': 0.27199023962020874, 'timestamp': '2025-09-30 22:21:40.333124', 'step': 7842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:40.375542', 'step': 7842, 'epoch': 2} {'type': 'loss', 'content': 0.062158141285181046, 'timestamp': '2025-09-30 22:21:40.389519', 'step': 7843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:40.430160', 'step': 7843, 'epoch': 2} {'type': 'loss', 'content': 0.11631867289543152, 'timestamp': '2025-09-30 22:21:40.466536', 'step': 7844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:40.507042', 'step': 7844, 'epoch': 2} {'type': 'loss', 'content': 0.14673276245594025, 'timestamp': '2025-09-30 22:21:40.522866', 'step': 7845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:40.565576', 'step': 7845, 'epoch': 2} {'type': 'loss', 'content': 0.0844314843416214, 'timestamp': '2025-09-30 22:21:40.582243', 'step': 7846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:40.623321', 'step': 7846, 'epoch': 2} {'type': 'loss', 'content': 0.0556156150996685, 'timestamp': '2025-09-30 22:21:40.636631', 'step': 7847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:40.677169', 'step': 7847, 'epoch': 2} {'type': 'loss', 'content': 0.17194990813732147, 'timestamp': '2025-09-30 22:21:40.712602', 'step': 7848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:40.758508', 'step': 7848, 'epoch': 2} {'type': 'loss', 'content': 0.17985394597053528, 'timestamp': '2025-09-30 22:21:40.770835', 'step': 7849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:40.810217', 'step': 7849, 'epoch': 2} {'type': 'loss', 'content': 0.11701515316963196, 'timestamp': '2025-09-30 22:21:40.824202', 'step': 7850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:40.864983', 'step': 7850, 'epoch': 2} {'type': 'loss', 'content': 0.14977166056632996, 'timestamp': '2025-09-30 22:21:40.869912', 'step': 7851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:40.911347', 'step': 7851, 'epoch': 2} {'type': 'loss', 'content': 0.14310353994369507, 'timestamp': '2025-09-30 22:21:40.937707', 'step': 7852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:40.981342', 'step': 7852, 'epoch': 2} {'type': 'loss', 'content': 0.09718815237283707, 'timestamp': '2025-09-30 22:21:40.998082', 'step': 7853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:41.040853', 'step': 7853, 'epoch': 2} {'type': 'loss', 'content': 0.08422420173883438, 'timestamp': '2025-09-30 22:21:41.061392', 'step': 7854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:41.118742', 'step': 7854, 'epoch': 2} {'type': 'loss', 'content': 0.21281632781028748, 'timestamp': '2025-09-30 22:21:41.138954', 'step': 7855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.186536', 'step': 7855, 'epoch': 2} {'type': 'loss', 'content': 0.10242422670125961, 'timestamp': '2025-09-30 22:21:41.230549', 'step': 7856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:41.276159', 'step': 7856, 'epoch': 2} {'type': 'loss', 'content': 0.10222067683935165, 'timestamp': '2025-09-30 22:21:41.280728', 'step': 7857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:41.313378', 'step': 7857, 'epoch': 2} {'type': 'loss', 'content': 0.24362444877624512, 'timestamp': '2025-09-30 22:21:41.333435', 'step': 7858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.383536', 'step': 7858, 'epoch': 2} {'type': 'loss', 'content': 0.12611544132232666, 'timestamp': '2025-09-30 22:21:41.404106', 'step': 7859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:41.453612', 'step': 7859, 'epoch': 2} {'type': 'loss', 'content': 0.12407136708498001, 'timestamp': '2025-09-30 22:21:41.494499', 'step': 7860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:41.541506', 'step': 7860, 'epoch': 2} {'type': 'loss', 'content': 0.1694795787334442, 'timestamp': '2025-09-30 22:21:41.544986', 'step': 7861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:41.577180', 'step': 7861, 'epoch': 2} {'type': 'loss', 'content': 0.10564202815294266, 'timestamp': '2025-09-30 22:21:41.581420', 'step': 7862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:41.619291', 'step': 7862, 'epoch': 2} {'type': 'loss', 'content': 0.1377585083246231, 'timestamp': '2025-09-30 22:21:41.635100', 'step': 7863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.676611', 'step': 7863, 'epoch': 2} {'type': 'loss', 'content': 0.08281019330024719, 'timestamp': '2025-09-30 22:21:41.711374', 'step': 7864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.744189', 'step': 7864, 'epoch': 2} {'type': 'loss', 'content': 0.17405714094638824, 'timestamp': '2025-09-30 22:21:41.748298', 'step': 7865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:41.780272', 'step': 7865, 'epoch': 2} {'type': 'loss', 'content': 0.10323309898376465, 'timestamp': '2025-09-30 22:21:41.783137', 'step': 7866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.816052', 'step': 7866, 'epoch': 2} {'type': 'loss', 'content': 0.07154116034507751, 'timestamp': '2025-09-30 22:21:41.820120', 'step': 7867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:41.852184', 'step': 7867, 'epoch': 2} {'type': 'loss', 'content': 0.10898394882678986, 'timestamp': '2025-09-30 22:21:41.877121', 'step': 7868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:41.910039', 'step': 7868, 'epoch': 2} {'type': 'loss', 'content': 0.12654469907283783, 'timestamp': '2025-09-30 22:21:41.922811', 'step': 7869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:41.954993', 'step': 7869, 'epoch': 2} {'type': 'loss', 'content': 0.1269744485616684, 'timestamp': '2025-09-30 22:21:41.966942', 'step': 7870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:42.007315', 'step': 7870, 'epoch': 2} {'type': 'loss', 'content': 0.19323238730430603, 'timestamp': '2025-09-30 22:21:42.020123', 'step': 7871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:42.061112', 'step': 7871, 'epoch': 2} {'type': 'loss', 'content': 0.15096895396709442, 'timestamp': '2025-09-30 22:21:42.086025', 'step': 7872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:42.117263', 'step': 7872, 'epoch': 2} {'type': 'loss', 'content': 0.1241798847913742, 'timestamp': '2025-09-30 22:21:42.128710', 'step': 7873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:42.171121', 'step': 7873, 'epoch': 2} {'type': 'loss', 'content': 0.08725633472204208, 'timestamp': '2025-09-30 22:21:42.186192', 'step': 7874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:42.228133', 'step': 7874, 'epoch': 2} {'type': 'loss', 'content': 0.08089321851730347, 'timestamp': '2025-09-30 22:21:42.244673', 'step': 7875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.288360', 'step': 7875, 'epoch': 2} {'type': 'loss', 'content': 0.08076392859220505, 'timestamp': '2025-09-30 22:21:42.324259', 'step': 7876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:42.368377', 'step': 7876, 'epoch': 2} {'type': 'loss', 'content': 0.07740148901939392, 'timestamp': '2025-09-30 22:21:42.381362', 'step': 7877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:42.422923', 'step': 7877, 'epoch': 2} {'type': 'loss', 'content': 0.08428290486335754, 'timestamp': '2025-09-30 22:21:42.436621', 'step': 7878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:42.479688', 'step': 7878, 'epoch': 2} {'type': 'loss', 'content': 0.07680954039096832, 'timestamp': '2025-09-30 22:21:42.484769', 'step': 7879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:42.516867', 'step': 7879, 'epoch': 2} {'type': 'loss', 'content': 0.12047801166772842, 'timestamp': '2025-09-30 22:21:42.554826', 'step': 7880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.587629', 'step': 7880, 'epoch': 2} {'type': 'loss', 'content': 0.08590270578861237, 'timestamp': '2025-09-30 22:21:42.600704', 'step': 7881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:42.641846', 'step': 7881, 'epoch': 2} {'type': 'loss', 'content': 0.17685729265213013, 'timestamp': '2025-09-30 22:21:42.655514', 'step': 7882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.699410', 'step': 7882, 'epoch': 2} {'type': 'loss', 'content': 0.1226624846458435, 'timestamp': '2025-09-30 22:21:42.715209', 'step': 7883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.758541', 'step': 7883, 'epoch': 2} {'type': 'loss', 'content': 0.13164667785167694, 'timestamp': '2025-09-30 22:21:42.798462', 'step': 7884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:42.841898', 'step': 7884, 'epoch': 2} {'type': 'loss', 'content': 0.07540853321552277, 'timestamp': '2025-09-30 22:21:42.846112', 'step': 7885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.888084', 'step': 7885, 'epoch': 2} {'type': 'loss', 'content': 0.12753963470458984, 'timestamp': '2025-09-30 22:21:42.903557', 'step': 7886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:42.947708', 'step': 7886, 'epoch': 2} {'type': 'loss', 'content': 0.21385926008224487, 'timestamp': '2025-09-30 22:21:42.963006', 'step': 7887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:43.001107', 'step': 7887, 'epoch': 2} {'type': 'loss', 'content': 0.19580096006393433, 'timestamp': '2025-09-30 22:21:43.037512', 'step': 7888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:43.076511', 'step': 7888, 'epoch': 2} {'type': 'loss', 'content': 0.10393238067626953, 'timestamp': '2025-09-30 22:21:43.082116', 'step': 7889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.128319', 'step': 7889, 'epoch': 2} {'type': 'loss', 'content': 0.02478623576462269, 'timestamp': '2025-09-30 22:21:43.133270', 'step': 7890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:43.166006', 'step': 7890, 'epoch': 2} {'type': 'loss', 'content': 0.11397380381822586, 'timestamp': '2025-09-30 22:21:43.181131', 'step': 7891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.227269', 'step': 7891, 'epoch': 2} {'type': 'loss', 'content': 0.17866767942905426, 'timestamp': '2025-09-30 22:21:43.264449', 'step': 7892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:43.305342', 'step': 7892, 'epoch': 2} {'type': 'loss', 'content': 0.0979406014084816, 'timestamp': '2025-09-30 22:21:43.321867', 'step': 7893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:43.366378', 'step': 7893, 'epoch': 2} {'type': 'loss', 'content': 0.06406612694263458, 'timestamp': '2025-09-30 22:21:43.371556', 'step': 7894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.404272', 'step': 7894, 'epoch': 2} {'type': 'loss', 'content': 0.17407166957855225, 'timestamp': '2025-09-30 22:21:43.421546', 'step': 7895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:43.452613', 'step': 7895, 'epoch': 2} {'type': 'loss', 'content': 0.11217890679836273, 'timestamp': '2025-09-30 22:21:43.478767', 'step': 7896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:43.519545', 'step': 7896, 'epoch': 2} {'type': 'loss', 'content': 0.17031213641166687, 'timestamp': '2025-09-30 22:21:43.535601', 'step': 7897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:43.580919', 'step': 7897, 'epoch': 2} {'type': 'loss', 'content': 0.14262840151786804, 'timestamp': '2025-09-30 22:21:43.596461', 'step': 7898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:43.638932', 'step': 7898, 'epoch': 2} {'type': 'loss', 'content': 0.08033537864685059, 'timestamp': '2025-09-30 22:21:43.655347', 'step': 7899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:43.697933', 'step': 7899, 'epoch': 2} {'type': 'loss', 'content': 0.19231493771076202, 'timestamp': '2025-09-30 22:21:43.734890', 'step': 7900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:43.779385', 'step': 7900, 'epoch': 2} {'type': 'loss', 'content': 0.12116323411464691, 'timestamp': '2025-09-30 22:21:43.783737', 'step': 7901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.825922', 'step': 7901, 'epoch': 2} {'type': 'loss', 'content': 0.09371780604124069, 'timestamp': '2025-09-30 22:21:43.840739', 'step': 7902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.881827', 'step': 7902, 'epoch': 2} {'type': 'loss', 'content': 0.14961698651313782, 'timestamp': '2025-09-30 22:21:43.886231', 'step': 7903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.928375', 'step': 7903, 'epoch': 2} {'type': 'loss', 'content': 0.17763668298721313, 'timestamp': '2025-09-30 22:21:43.953638', 'step': 7904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:43.984382', 'step': 7904, 'epoch': 2} {'type': 'loss', 'content': 0.15323160588741302, 'timestamp': '2025-09-30 22:21:43.988899', 'step': 7905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:44.020230', 'step': 7905, 'epoch': 2} {'type': 'loss', 'content': 0.14503414928913116, 'timestamp': '2025-09-30 22:21:44.037133', 'step': 7906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:44.080061', 'step': 7906, 'epoch': 2} {'type': 'loss', 'content': 0.16397681832313538, 'timestamp': '2025-09-30 22:21:44.083892', 'step': 7907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:44.115322', 'step': 7907, 'epoch': 2} {'type': 'loss', 'content': 0.12063062191009521, 'timestamp': '2025-09-30 22:21:44.153498', 'step': 7908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:44.184496', 'step': 7908, 'epoch': 2} {'type': 'loss', 'content': 0.14061108231544495, 'timestamp': '2025-09-30 22:21:44.188038', 'step': 7909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:44.233359', 'step': 7909, 'epoch': 2} {'type': 'loss', 'content': 0.10582992434501648, 'timestamp': '2025-09-30 22:21:44.237239', 'step': 7910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:44.269838', 'step': 7910, 'epoch': 2} {'type': 'loss', 'content': 0.11786631494760513, 'timestamp': '2025-09-30 22:21:44.273711', 'step': 7911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:44.318650', 'step': 7911, 'epoch': 2} {'type': 'loss', 'content': 0.10710892826318741, 'timestamp': '2025-09-30 22:21:44.355451', 'step': 7912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:44.386902', 'step': 7912, 'epoch': 2} {'type': 'loss', 'content': 0.13984975218772888, 'timestamp': '2025-09-30 22:21:44.405086', 'step': 7913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:44.450112', 'step': 7913, 'epoch': 2} {'type': 'loss', 'content': 0.1633201241493225, 'timestamp': '2025-09-30 22:21:44.475963', 'step': 7914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:44.518568', 'step': 7914, 'epoch': 2} {'type': 'loss', 'content': 0.13803912699222565, 'timestamp': '2025-09-30 22:21:44.530283', 'step': 7915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:44.574567', 'step': 7915, 'epoch': 2} {'type': 'loss', 'content': 0.08746176958084106, 'timestamp': '2025-09-30 22:21:44.602616', 'step': 7916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:44.648491', 'step': 7916, 'epoch': 2} {'type': 'loss', 'content': 0.14441902935504913, 'timestamp': '2025-09-30 22:21:44.661096', 'step': 7917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:44.691621', 'step': 7917, 'epoch': 2} {'type': 'loss', 'content': 0.09273235499858856, 'timestamp': '2025-09-30 22:21:44.708293', 'step': 7918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:44.753260', 'step': 7918, 'epoch': 2} {'type': 'loss', 'content': 0.1201053336262703, 'timestamp': '2025-09-30 22:21:44.756583', 'step': 7919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:44.788873', 'step': 7919, 'epoch': 2} {'type': 'loss', 'content': 0.18042472004890442, 'timestamp': '2025-09-30 22:21:44.813655', 'step': 7920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:44.844687', 'step': 7920, 'epoch': 2} {'type': 'loss', 'content': 0.11291519552469254, 'timestamp': '2025-09-30 22:21:44.849111', 'step': 7921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:44.882095', 'step': 7921, 'epoch': 2} {'type': 'loss', 'content': 0.1597137749195099, 'timestamp': '2025-09-30 22:21:44.887057', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:21:52.708256', 'step': 7922, 'epoch': 2} {'type': 'pplx', 'content': 10705.862091617357, 'timestamp': '2025-09-30 22:21:52.717715', 'step': 7922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:52.753877', 'step': 7922, 'epoch': 2} {'type': 'loss', 'content': 0.05603892356157303, 'timestamp': '2025-09-30 22:21:52.766342', 'step': 7923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:52.806318', 'step': 7923, 'epoch': 2} {'type': 'loss', 'content': 0.14893756806850433, 'timestamp': '2025-09-30 22:21:52.836578', 'step': 7924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:52.873159', 'step': 7924, 'epoch': 2} {'type': 'loss', 'content': 0.09168671816587448, 'timestamp': '2025-09-30 22:21:52.881120', 'step': 7925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:52.916666', 'step': 7925, 'epoch': 2} {'type': 'loss', 'content': 0.08629955351352692, 'timestamp': '2025-09-30 22:21:52.920111', 'step': 7926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:52.956383', 'step': 7926, 'epoch': 2} {'type': 'loss', 'content': 0.10365032404661179, 'timestamp': '2025-09-30 22:21:52.964665', 'step': 7927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.001103', 'step': 7927, 'epoch': 2} {'type': 'loss', 'content': 0.13823819160461426, 'timestamp': '2025-09-30 22:21:53.031594', 'step': 7928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.068801', 'step': 7928, 'epoch': 2} {'type': 'loss', 'content': 0.1482132375240326, 'timestamp': '2025-09-30 22:21:53.078401', 'step': 7929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.114732', 'step': 7929, 'epoch': 2} {'type': 'loss', 'content': 0.06875878572463989, 'timestamp': '2025-09-30 22:21:53.122855', 'step': 7930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:53.157450', 'step': 7930, 'epoch': 2} {'type': 'loss', 'content': 0.1845446676015854, 'timestamp': '2025-09-30 22:21:53.161386', 'step': 7931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.195681', 'step': 7931, 'epoch': 2} {'type': 'loss', 'content': 0.12925411760807037, 'timestamp': '2025-09-30 22:21:53.221014', 'step': 7932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:53.253613', 'step': 7932, 'epoch': 2} {'type': 'loss', 'content': 0.1037498340010643, 'timestamp': '2025-09-30 22:21:53.256672', 'step': 7933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:53.287467', 'step': 7933, 'epoch': 2} {'type': 'loss', 'content': 0.060826390981674194, 'timestamp': '2025-09-30 22:21:53.296031', 'step': 7934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:53.334431', 'step': 7934, 'epoch': 2} {'type': 'loss', 'content': 0.21458421647548676, 'timestamp': '2025-09-30 22:21:53.344106', 'step': 7935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.381196', 'step': 7935, 'epoch': 2} {'type': 'loss', 'content': 0.05479556694626808, 'timestamp': '2025-09-30 22:21:53.408723', 'step': 7936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.442496', 'step': 7936, 'epoch': 2} {'type': 'loss', 'content': 0.13462397456169128, 'timestamp': '2025-09-30 22:21:53.447704', 'step': 7937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:53.481691', 'step': 7937, 'epoch': 2} {'type': 'loss', 'content': 0.09315597265958786, 'timestamp': '2025-09-30 22:21:53.491541', 'step': 7938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:53.522965', 'step': 7938, 'epoch': 2} {'type': 'loss', 'content': 0.1172705665230751, 'timestamp': '2025-09-30 22:21:53.526915', 'step': 7939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:53.558764', 'step': 7939, 'epoch': 2} {'type': 'loss', 'content': 0.25810927152633667, 'timestamp': '2025-09-30 22:21:53.588479', 'step': 7940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:53.619509', 'step': 7940, 'epoch': 2} {'type': 'loss', 'content': 0.13880655169487, 'timestamp': '2025-09-30 22:21:53.627002', 'step': 7941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:53.658524', 'step': 7941, 'epoch': 2} {'type': 'loss', 'content': 0.12288665771484375, 'timestamp': '2025-09-30 22:21:53.665715', 'step': 7942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:53.701177', 'step': 7942, 'epoch': 2} {'type': 'loss', 'content': 0.11063653975725174, 'timestamp': '2025-09-30 22:21:53.710485', 'step': 7943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:53.746570', 'step': 7943, 'epoch': 2} {'type': 'loss', 'content': 0.16828982532024384, 'timestamp': '2025-09-30 22:21:53.775529', 'step': 7944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:53.807471', 'step': 7944, 'epoch': 2} {'type': 'loss', 'content': 0.0801420509815216, 'timestamp': '2025-09-30 22:21:53.819425', 'step': 7945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:53.857541', 'step': 7945, 'epoch': 2} {'type': 'loss', 'content': 0.0931919738650322, 'timestamp': '2025-09-30 22:21:53.871291', 'step': 7946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:53.912720', 'step': 7946, 'epoch': 2} {'type': 'loss', 'content': 0.03431796282529831, 'timestamp': '2025-09-30 22:21:53.926807', 'step': 7947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:53.968696', 'step': 7947, 'epoch': 2} {'type': 'loss', 'content': 0.16258201003074646, 'timestamp': '2025-09-30 22:21:54.001805', 'step': 7948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:54.043135', 'step': 7948, 'epoch': 2} {'type': 'loss', 'content': 0.13995693624019623, 'timestamp': '2025-09-30 22:21:54.054570', 'step': 7949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.086313', 'step': 7949, 'epoch': 2} {'type': 'loss', 'content': 0.14556968212127686, 'timestamp': '2025-09-30 22:21:54.097288', 'step': 7950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:54.135572', 'step': 7950, 'epoch': 2} {'type': 'loss', 'content': 0.18011555075645447, 'timestamp': '2025-09-30 22:21:54.148094', 'step': 7951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.190548', 'step': 7951, 'epoch': 2} {'type': 'loss', 'content': 0.08217232674360275, 'timestamp': '2025-09-30 22:21:54.221871', 'step': 7952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.262043', 'step': 7952, 'epoch': 2} {'type': 'loss', 'content': 0.09973809868097305, 'timestamp': '2025-09-30 22:21:54.271805', 'step': 7953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:54.309629', 'step': 7953, 'epoch': 2} {'type': 'loss', 'content': 0.14318497478961945, 'timestamp': '2025-09-30 22:21:54.320500', 'step': 7954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.357455', 'step': 7954, 'epoch': 2} {'type': 'loss', 'content': 0.16017736494541168, 'timestamp': '2025-09-30 22:21:54.361344', 'step': 7955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:54.398301', 'step': 7955, 'epoch': 2} {'type': 'loss', 'content': 0.1936902552843094, 'timestamp': '2025-09-30 22:21:54.424056', 'step': 7956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:21:54.462458', 'step': 7956, 'epoch': 2} {'type': 'loss', 'content': 0.1285051852464676, 'timestamp': '2025-09-30 22:21:54.473640', 'step': 7957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:54.511250', 'step': 7957, 'epoch': 2} {'type': 'loss', 'content': 0.1317782998085022, 'timestamp': '2025-09-30 22:21:54.519643', 'step': 7958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.556061', 'step': 7958, 'epoch': 2} {'type': 'loss', 'content': 0.04694928601384163, 'timestamp': '2025-09-30 22:21:54.563385', 'step': 7959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:54.604556', 'step': 7959, 'epoch': 2} {'type': 'loss', 'content': 0.06504189968109131, 'timestamp': '2025-09-30 22:21:54.632585', 'step': 7960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:54.670189', 'step': 7960, 'epoch': 2} {'type': 'loss', 'content': 0.05578947812318802, 'timestamp': '2025-09-30 22:21:54.678990', 'step': 7961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:54.714905', 'step': 7961, 'epoch': 2} {'type': 'loss', 'content': 0.17307278513908386, 'timestamp': '2025-09-30 22:21:54.724670', 'step': 7962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:54.759688', 'step': 7962, 'epoch': 2} {'type': 'loss', 'content': 0.09627131372690201, 'timestamp': '2025-09-30 22:21:54.767755', 'step': 7963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.802839', 'step': 7963, 'epoch': 2} {'type': 'loss', 'content': 0.07150454819202423, 'timestamp': '2025-09-30 22:21:54.833965', 'step': 7964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:54.872099', 'step': 7964, 'epoch': 2} {'type': 'loss', 'content': 0.07957340031862259, 'timestamp': '2025-09-30 22:21:54.884691', 'step': 7965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:54.925310', 'step': 7965, 'epoch': 2} {'type': 'loss', 'content': 0.1442108452320099, 'timestamp': '2025-09-30 22:21:54.933896', 'step': 7966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:54.971854', 'step': 7966, 'epoch': 2} {'type': 'loss', 'content': 0.17682978510856628, 'timestamp': '2025-09-30 22:21:54.981161', 'step': 7967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.019325', 'step': 7967, 'epoch': 2} {'type': 'loss', 'content': 0.07809494435787201, 'timestamp': '2025-09-30 22:21:55.048898', 'step': 7968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:55.085591', 'step': 7968, 'epoch': 2} {'type': 'loss', 'content': 0.1271391063928604, 'timestamp': '2025-09-30 22:21:55.094283', 'step': 7969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.132929', 'step': 7969, 'epoch': 2} {'type': 'loss', 'content': 0.07402998208999634, 'timestamp': '2025-09-30 22:21:55.139159', 'step': 7970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:55.172552', 'step': 7970, 'epoch': 2} {'type': 'loss', 'content': 0.06528926640748978, 'timestamp': '2025-09-30 22:21:55.180341', 'step': 7971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.217627', 'step': 7971, 'epoch': 2} {'type': 'loss', 'content': 0.08225250244140625, 'timestamp': '2025-09-30 22:21:55.249150', 'step': 7972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.288407', 'step': 7972, 'epoch': 2} {'type': 'loss', 'content': 0.20148058235645294, 'timestamp': '2025-09-30 22:21:55.297397', 'step': 7973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:55.335057', 'step': 7973, 'epoch': 2} {'type': 'loss', 'content': 0.12634040415287018, 'timestamp': '2025-09-30 22:21:55.345868', 'step': 7974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:55.383758', 'step': 7974, 'epoch': 2} {'type': 'loss', 'content': 0.13570746779441833, 'timestamp': '2025-09-30 22:21:55.393432', 'step': 7975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:55.432742', 'step': 7975, 'epoch': 2} {'type': 'loss', 'content': 0.10397876799106598, 'timestamp': '2025-09-30 22:21:55.466147', 'step': 7976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.506737', 'step': 7976, 'epoch': 2} {'type': 'loss', 'content': 0.14877323806285858, 'timestamp': '2025-09-30 22:21:55.515857', 'step': 7977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.548619', 'step': 7977, 'epoch': 2} {'type': 'loss', 'content': 0.08085931837558746, 'timestamp': '2025-09-30 22:21:55.559140', 'step': 7978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:55.595492', 'step': 7978, 'epoch': 2} {'type': 'loss', 'content': 0.13703124225139618, 'timestamp': '2025-09-30 22:21:55.603844', 'step': 7979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:55.642449', 'step': 7979, 'epoch': 2} {'type': 'loss', 'content': 0.0789952278137207, 'timestamp': '2025-09-30 22:21:55.668229', 'step': 7980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:21:55.701013', 'step': 7980, 'epoch': 2} {'type': 'loss', 'content': 0.07697327435016632, 'timestamp': '2025-09-30 22:21:55.712861', 'step': 7981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:55.753482', 'step': 7981, 'epoch': 2} {'type': 'loss', 'content': 0.25289618968963623, 'timestamp': '2025-09-30 22:21:55.757265', 'step': 7982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:55.791723', 'step': 7982, 'epoch': 2} {'type': 'loss', 'content': 0.10806722193956375, 'timestamp': '2025-09-30 22:21:55.803394', 'step': 7983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.842654', 'step': 7983, 'epoch': 2} {'type': 'loss', 'content': 0.04880259931087494, 'timestamp': '2025-09-30 22:21:55.870905', 'step': 7984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.902088', 'step': 7984, 'epoch': 2} {'type': 'loss', 'content': 0.06008286029100418, 'timestamp': '2025-09-30 22:21:55.905591', 'step': 7985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:55.936895', 'step': 7985, 'epoch': 2} {'type': 'loss', 'content': 0.16524098813533783, 'timestamp': '2025-09-30 22:21:55.940691', 'step': 7986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:55.972772', 'step': 7986, 'epoch': 2} {'type': 'loss', 'content': 0.13504180312156677, 'timestamp': '2025-09-30 22:21:55.975391', 'step': 7987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:56.013730', 'step': 7987, 'epoch': 2} {'type': 'loss', 'content': 0.18829543888568878, 'timestamp': '2025-09-30 22:21:56.045062', 'step': 7988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:56.075768', 'step': 7988, 'epoch': 2} {'type': 'loss', 'content': 0.1145784929394722, 'timestamp': '2025-09-30 22:21:56.082687', 'step': 7989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:56.118650', 'step': 7989, 'epoch': 2} {'type': 'loss', 'content': 0.14308978617191315, 'timestamp': '2025-09-30 22:21:56.124629', 'step': 7990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:21:56.160559', 'step': 7990, 'epoch': 2} {'type': 'loss', 'content': 0.10112740844488144, 'timestamp': '2025-09-30 22:21:56.166132', 'step': 7991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:56.201321', 'step': 7991, 'epoch': 2} {'type': 'loss', 'content': 0.06487017124891281, 'timestamp': '2025-09-30 22:21:56.228056', 'step': 7992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:21:56.263185', 'step': 7992, 'epoch': 2} {'type': 'loss', 'content': 0.1286533772945404, 'timestamp': '2025-09-30 22:21:56.269573', 'step': 7993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:21:56.303733', 'step': 7993, 'epoch': 2} {'type': 'loss', 'content': 0.10126849263906479, 'timestamp': '2025-09-30 22:21:56.313142', 'step': 7994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:56.347690', 'step': 7994, 'epoch': 2} {'type': 'loss', 'content': 0.18779641389846802, 'timestamp': '2025-09-30 22:21:56.354119', 'step': 7995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:21:56.390704', 'step': 7995, 'epoch': 2} {'type': 'loss', 'content': 0.18592017889022827, 'timestamp': '2025-09-30 22:21:56.418601', 'step': 7996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:56.451078', 'step': 7996, 'epoch': 2} {'type': 'loss', 'content': 0.08127663284540176, 'timestamp': '2025-09-30 22:21:56.456387', 'step': 7997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:56.489503', 'step': 7997, 'epoch': 2} {'type': 'loss', 'content': 0.0949389711022377, 'timestamp': '2025-09-30 22:21:56.499320', 'step': 7998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:21:56.536447', 'step': 7998, 'epoch': 2} {'type': 'loss', 'content': 0.20741085708141327, 'timestamp': '2025-09-30 22:21:56.545009', 'step': 7999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:21:56.582157', 'step': 7999, 'epoch': 2} {'type': 'loss', 'content': 0.15286873281002045, 'timestamp': '2025-09-30 22:21:56.612716', 'step': 8000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8000', 'timestamp': '2025-09-30 22:22:02.241531', 'step': 8000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.294084', 'step': 8000, 'epoch': 2} {'type': 'loss', 'content': 0.1366649568080902, 'timestamp': '2025-09-30 22:22:02.300619', 'step': 8001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.336555', 'step': 8001, 'epoch': 2} {'type': 'loss', 'content': 0.11067042499780655, 'timestamp': '2025-09-30 22:22:02.340513', 'step': 8002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.373452', 'step': 8002, 'epoch': 2} {'type': 'loss', 'content': 0.08112529665231705, 'timestamp': '2025-09-30 22:22:02.381839', 'step': 8003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:02.418256', 'step': 8003, 'epoch': 2} {'type': 'loss', 'content': 0.08064671605825424, 'timestamp': '2025-09-30 22:22:02.446836', 'step': 8004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:02.482238', 'step': 8004, 'epoch': 2} {'type': 'loss', 'content': 0.1206565871834755, 'timestamp': '2025-09-30 22:22:02.484753', 'step': 8005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.520098', 'step': 8005, 'epoch': 2} {'type': 'loss', 'content': 0.14684943854808807, 'timestamp': '2025-09-30 22:22:02.525748', 'step': 8006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:02.559432', 'step': 8006, 'epoch': 2} {'type': 'loss', 'content': 0.11402953416109085, 'timestamp': '2025-09-30 22:22:02.563926', 'step': 8007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:02.597603', 'step': 8007, 'epoch': 2} {'type': 'loss', 'content': 0.15769857168197632, 'timestamp': '2025-09-30 22:22:02.624892', 'step': 8008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:02.658634', 'step': 8008, 'epoch': 2} {'type': 'loss', 'content': 0.1402716040611267, 'timestamp': '2025-09-30 22:22:02.663288', 'step': 8009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.697166', 'step': 8009, 'epoch': 2} {'type': 'loss', 'content': 0.06958486884832382, 'timestamp': '2025-09-30 22:22:02.701402', 'step': 8010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.733749', 'step': 8010, 'epoch': 2} {'type': 'loss', 'content': 0.07470950484275818, 'timestamp': '2025-09-30 22:22:02.738110', 'step': 8011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.770021', 'step': 8011, 'epoch': 2} {'type': 'loss', 'content': 0.1688547432422638, 'timestamp': '2025-09-30 22:22:02.793660', 'step': 8012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:02.826129', 'step': 8012, 'epoch': 2} {'type': 'loss', 'content': 0.14247184991836548, 'timestamp': '2025-09-30 22:22:02.834169', 'step': 8013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:02.869839', 'step': 8013, 'epoch': 2} {'type': 'loss', 'content': 0.11874455958604813, 'timestamp': '2025-09-30 22:22:02.879920', 'step': 8014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.916635', 'step': 8014, 'epoch': 2} {'type': 'loss', 'content': 0.11056623607873917, 'timestamp': '2025-09-30 22:22:02.926166', 'step': 8015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:02.963619', 'step': 8015, 'epoch': 2} {'type': 'loss', 'content': 0.11992909759283066, 'timestamp': '2025-09-30 22:22:02.992038', 'step': 8016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.025597', 'step': 8016, 'epoch': 2} {'type': 'loss', 'content': 0.06708011031150818, 'timestamp': '2025-09-30 22:22:03.030066', 'step': 8017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:03.062710', 'step': 8017, 'epoch': 2} {'type': 'loss', 'content': 0.06326974928379059, 'timestamp': '2025-09-30 22:22:03.068401', 'step': 8018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.102535', 'step': 8018, 'epoch': 2} {'type': 'loss', 'content': 0.09772739559412003, 'timestamp': '2025-09-30 22:22:03.109216', 'step': 8019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:03.143072', 'step': 8019, 'epoch': 2} {'type': 'loss', 'content': 0.13422758877277374, 'timestamp': '2025-09-30 22:22:03.170831', 'step': 8020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.204955', 'step': 8020, 'epoch': 2} {'type': 'loss', 'content': 0.18422815203666687, 'timestamp': '2025-09-30 22:22:03.208345', 'step': 8021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:03.240060', 'step': 8021, 'epoch': 2} {'type': 'loss', 'content': 0.05720411241054535, 'timestamp': '2025-09-30 22:22:03.243347', 'step': 8022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:03.278945', 'step': 8022, 'epoch': 2} {'type': 'loss', 'content': 0.10653981566429138, 'timestamp': '2025-09-30 22:22:03.285540', 'step': 8023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:03.320896', 'step': 8023, 'epoch': 2} {'type': 'loss', 'content': 0.20937244594097137, 'timestamp': '2025-09-30 22:22:03.347432', 'step': 8024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:22:03.380639', 'step': 8024, 'epoch': 2} {'type': 'loss', 'content': 0.16918261349201202, 'timestamp': '2025-09-30 22:22:03.383552', 'step': 8025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.413179', 'step': 8025, 'epoch': 2} {'type': 'loss', 'content': 0.13948820531368256, 'timestamp': '2025-09-30 22:22:03.417440', 'step': 8026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:03.449590', 'step': 8026, 'epoch': 2} {'type': 'loss', 'content': 0.16901442408561707, 'timestamp': '2025-09-30 22:22:03.454678', 'step': 8027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:03.486603', 'step': 8027, 'epoch': 2} {'type': 'loss', 'content': 0.09105818718671799, 'timestamp': '2025-09-30 22:22:03.518167', 'step': 8028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.555655', 'step': 8028, 'epoch': 2} {'type': 'loss', 'content': 0.11968745291233063, 'timestamp': '2025-09-30 22:22:03.565527', 'step': 8029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.603594', 'step': 8029, 'epoch': 2} {'type': 'loss', 'content': 0.12940742075443268, 'timestamp': '2025-09-30 22:22:03.612316', 'step': 8030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:03.650040', 'step': 8030, 'epoch': 2} {'type': 'loss', 'content': 0.13499268889427185, 'timestamp': '2025-09-30 22:22:03.654956', 'step': 8031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.688057', 'step': 8031, 'epoch': 2} {'type': 'loss', 'content': 0.07857279479503632, 'timestamp': '2025-09-30 22:22:03.715380', 'step': 8032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.745568', 'step': 8032, 'epoch': 2} {'type': 'loss', 'content': 0.06052974984049797, 'timestamp': '2025-09-30 22:22:03.750749', 'step': 8033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:03.783842', 'step': 8033, 'epoch': 2} {'type': 'loss', 'content': 0.2843032479286194, 'timestamp': '2025-09-30 22:22:03.791131', 'step': 8034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.826271', 'step': 8034, 'epoch': 2} {'type': 'loss', 'content': 0.06354702264070511, 'timestamp': '2025-09-30 22:22:03.830700', 'step': 8035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:03.864806', 'step': 8035, 'epoch': 2} {'type': 'loss', 'content': 0.10358510911464691, 'timestamp': '2025-09-30 22:22:03.891885', 'step': 8036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:03.925625', 'step': 8036, 'epoch': 2} {'type': 'loss', 'content': 0.060753218829631805, 'timestamp': '2025-09-30 22:22:03.930631', 'step': 8037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:03.964936', 'step': 8037, 'epoch': 2} {'type': 'loss', 'content': 0.22687748074531555, 'timestamp': '2025-09-30 22:22:03.967337', 'step': 8038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:03.999751', 'step': 8038, 'epoch': 2} {'type': 'loss', 'content': 0.1962331235408783, 'timestamp': '2025-09-30 22:22:04.003511', 'step': 8039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:04.036863', 'step': 8039, 'epoch': 2} {'type': 'loss', 'content': 0.14046072959899902, 'timestamp': '2025-09-30 22:22:04.062998', 'step': 8040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:04.102478', 'step': 8040, 'epoch': 2} {'type': 'loss', 'content': 0.08443284034729004, 'timestamp': '2025-09-30 22:22:04.106866', 'step': 8041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:04.139238', 'step': 8041, 'epoch': 2} {'type': 'loss', 'content': 0.07392945140600204, 'timestamp': '2025-09-30 22:22:04.144597', 'step': 8042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:04.178413', 'step': 8042, 'epoch': 2} {'type': 'loss', 'content': 0.09788090735673904, 'timestamp': '2025-09-30 22:22:04.183420', 'step': 8043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:04.218217', 'step': 8043, 'epoch': 2} {'type': 'loss', 'content': 0.19905759394168854, 'timestamp': '2025-09-30 22:22:04.245800', 'step': 8044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:04.280288', 'step': 8044, 'epoch': 2} {'type': 'loss', 'content': 0.15613065659999847, 'timestamp': '2025-09-30 22:22:04.285643', 'step': 8045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:22:04.317173', 'step': 8045, 'epoch': 2} {'type': 'loss', 'content': 0.09280557185411453, 'timestamp': '2025-09-30 22:22:04.327459', 'step': 8046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:04.365958', 'step': 8046, 'epoch': 2} {'type': 'loss', 'content': 0.11149319261312485, 'timestamp': '2025-09-30 22:22:04.375277', 'step': 8047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:04.413007', 'step': 8047, 'epoch': 2} {'type': 'loss', 'content': 0.12995071709156036, 'timestamp': '2025-09-30 22:22:04.444191', 'step': 8048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:04.482621', 'step': 8048, 'epoch': 2} {'type': 'loss', 'content': 0.2044021189212799, 'timestamp': '2025-09-30 22:22:04.491824', 'step': 8049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:04.528647', 'step': 8049, 'epoch': 2} {'type': 'loss', 'content': 0.11755862832069397, 'timestamp': '2025-09-30 22:22:04.533941', 'step': 8050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:04.568446', 'step': 8050, 'epoch': 2} {'type': 'loss', 'content': 0.12177351117134094, 'timestamp': '2025-09-30 22:22:04.575962', 'step': 8051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:22:04.610536', 'step': 8051, 'epoch': 2} {'type': 'loss', 'content': 0.13766732811927795, 'timestamp': '2025-09-30 22:22:04.639196', 'step': 8052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:04.674643', 'step': 8052, 'epoch': 2} {'type': 'loss', 'content': 0.14526458084583282, 'timestamp': '2025-09-30 22:22:04.679354', 'step': 8053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:04.714850', 'step': 8053, 'epoch': 2} {'type': 'loss', 'content': 0.08640066534280777, 'timestamp': '2025-09-30 22:22:04.719311', 'step': 8054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:04.752506', 'step': 8054, 'epoch': 2} {'type': 'loss', 'content': 0.12588241696357727, 'timestamp': '2025-09-30 22:22:04.756527', 'step': 8055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:04.789661', 'step': 8055, 'epoch': 2} {'type': 'loss', 'content': 0.23863959312438965, 'timestamp': '2025-09-30 22:22:04.816111', 'step': 8056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:04.851209', 'step': 8056, 'epoch': 2} {'type': 'loss', 'content': 0.13071541488170624, 'timestamp': '2025-09-30 22:22:04.854067', 'step': 8057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:04.890334', 'step': 8057, 'epoch': 2} {'type': 'loss', 'content': 0.1495949923992157, 'timestamp': '2025-09-30 22:22:04.898633', 'step': 8058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:04.935227', 'step': 8058, 'epoch': 2} {'type': 'loss', 'content': 0.1310892403125763, 'timestamp': '2025-09-30 22:22:04.940332', 'step': 8059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:04.973743', 'step': 8059, 'epoch': 2} {'type': 'loss', 'content': 0.12316912412643433, 'timestamp': '2025-09-30 22:22:05.000794', 'step': 8060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.031463', 'step': 8060, 'epoch': 2} {'type': 'loss', 'content': 0.0924425795674324, 'timestamp': '2025-09-30 22:22:05.035725', 'step': 8061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:05.068257', 'step': 8061, 'epoch': 2} {'type': 'loss', 'content': 0.23843245208263397, 'timestamp': '2025-09-30 22:22:05.071029', 'step': 8062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:05.104319', 'step': 8062, 'epoch': 2} {'type': 'loss', 'content': 0.14266756176948547, 'timestamp': '2025-09-30 22:22:05.108601', 'step': 8063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.141022', 'step': 8063, 'epoch': 2} {'type': 'loss', 'content': 0.13232670724391937, 'timestamp': '2025-09-30 22:22:05.164942', 'step': 8064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:05.195950', 'step': 8064, 'epoch': 2} {'type': 'loss', 'content': 0.13683488965034485, 'timestamp': '2025-09-30 22:22:05.198779', 'step': 8065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.230832', 'step': 8065, 'epoch': 2} {'type': 'loss', 'content': 0.07929937541484833, 'timestamp': '2025-09-30 22:22:05.234616', 'step': 8066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.267025', 'step': 8066, 'epoch': 2} {'type': 'loss', 'content': 0.06712378561496735, 'timestamp': '2025-09-30 22:22:05.271356', 'step': 8067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:05.302814', 'step': 8067, 'epoch': 2} {'type': 'loss', 'content': 0.15138636529445648, 'timestamp': '2025-09-30 22:22:05.328974', 'step': 8068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:05.359108', 'step': 8068, 'epoch': 2} {'type': 'loss', 'content': 0.031087055802345276, 'timestamp': '2025-09-30 22:22:05.371787', 'step': 8069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.414272', 'step': 8069, 'epoch': 2} {'type': 'loss', 'content': 0.14017733931541443, 'timestamp': '2025-09-30 22:22:05.427560', 'step': 8070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.469654', 'step': 8070, 'epoch': 2} {'type': 'loss', 'content': 0.16603830456733704, 'timestamp': '2025-09-30 22:22:05.474408', 'step': 8071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.514010', 'step': 8071, 'epoch': 2} {'type': 'loss', 'content': 0.15645909309387207, 'timestamp': '2025-09-30 22:22:05.539102', 'step': 8072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.572774', 'step': 8072, 'epoch': 2} {'type': 'loss', 'content': 0.08628290146589279, 'timestamp': '2025-09-30 22:22:05.579123', 'step': 8073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.614762', 'step': 8073, 'epoch': 2} {'type': 'loss', 'content': 0.08689481019973755, 'timestamp': '2025-09-30 22:22:05.621267', 'step': 8074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.657238', 'step': 8074, 'epoch': 2} {'type': 'loss', 'content': 0.19479696452617645, 'timestamp': '2025-09-30 22:22:05.664601', 'step': 8075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:05.698225', 'step': 8075, 'epoch': 2} {'type': 'loss', 'content': 0.1466999351978302, 'timestamp': '2025-09-30 22:22:05.727131', 'step': 8076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.757642', 'step': 8076, 'epoch': 2} {'type': 'loss', 'content': 0.15899576246738434, 'timestamp': '2025-09-30 22:22:05.764672', 'step': 8077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:05.796396', 'step': 8077, 'epoch': 2} {'type': 'loss', 'content': 0.18810853362083435, 'timestamp': '2025-09-30 22:22:05.803696', 'step': 8078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.840802', 'step': 8078, 'epoch': 2} {'type': 'loss', 'content': 0.1449832171201706, 'timestamp': '2025-09-30 22:22:05.844877', 'step': 8079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:05.880693', 'step': 8079, 'epoch': 2} {'type': 'loss', 'content': 0.18426305055618286, 'timestamp': '2025-09-30 22:22:05.905432', 'step': 8080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:05.938717', 'step': 8080, 'epoch': 2} {'type': 'loss', 'content': 0.130808025598526, 'timestamp': '2025-09-30 22:22:05.941724', 'step': 8081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:05.976489', 'step': 8081, 'epoch': 2} {'type': 'loss', 'content': 0.16933806240558624, 'timestamp': '2025-09-30 22:22:05.979540', 'step': 8082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.014361', 'step': 8082, 'epoch': 2} {'type': 'loss', 'content': 0.1901673972606659, 'timestamp': '2025-09-30 22:22:06.017168', 'step': 8083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:06.052715', 'step': 8083, 'epoch': 2} {'type': 'loss', 'content': 0.1429893672466278, 'timestamp': '2025-09-30 22:22:06.078965', 'step': 8084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.129355', 'step': 8084, 'epoch': 2} {'type': 'loss', 'content': 0.10896099358797073, 'timestamp': '2025-09-30 22:22:06.132886', 'step': 8085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:06.180938', 'step': 8085, 'epoch': 2} {'type': 'loss', 'content': 0.1937774419784546, 'timestamp': '2025-09-30 22:22:06.185321', 'step': 8086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.226050', 'step': 8086, 'epoch': 2} {'type': 'loss', 'content': 0.22593411803245544, 'timestamp': '2025-09-30 22:22:06.243162', 'step': 8087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:06.278877', 'step': 8087, 'epoch': 2} {'type': 'loss', 'content': 0.10766786336898804, 'timestamp': '2025-09-30 22:22:06.314688', 'step': 8088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:06.370624', 'step': 8088, 'epoch': 2} {'type': 'loss', 'content': 0.15668290853500366, 'timestamp': '2025-09-30 22:22:06.375046', 'step': 8089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:06.406583', 'step': 8089, 'epoch': 2} {'type': 'loss', 'content': 0.052767105400562286, 'timestamp': '2025-09-30 22:22:06.410621', 'step': 8090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.451239', 'step': 8090, 'epoch': 2} {'type': 'loss', 'content': 0.11569872498512268, 'timestamp': '2025-09-30 22:22:06.456906', 'step': 8091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.495083', 'step': 8091, 'epoch': 2} {'type': 'loss', 'content': 0.10891062021255493, 'timestamp': '2025-09-30 22:22:06.520190', 'step': 8092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:06.562291', 'step': 8092, 'epoch': 2} {'type': 'loss', 'content': 0.14350193738937378, 'timestamp': '2025-09-30 22:22:06.564845', 'step': 8093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:06.613788', 'step': 8093, 'epoch': 2} {'type': 'loss', 'content': 0.15309292078018188, 'timestamp': '2025-09-30 22:22:06.617720', 'step': 8094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:06.665218', 'step': 8094, 'epoch': 2} {'type': 'loss', 'content': 0.1045939028263092, 'timestamp': '2025-09-30 22:22:06.670602', 'step': 8095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:06.705854', 'step': 8095, 'epoch': 2} {'type': 'loss', 'content': 0.11992378532886505, 'timestamp': '2025-09-30 22:22:06.731224', 'step': 8096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:06.767379', 'step': 8096, 'epoch': 2} {'type': 'loss', 'content': 0.10441102087497711, 'timestamp': '2025-09-30 22:22:06.775452', 'step': 8097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:06.831961', 'step': 8097, 'epoch': 2} {'type': 'loss', 'content': 0.12442134320735931, 'timestamp': '2025-09-30 22:22:06.835542', 'step': 8098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.867874', 'step': 8098, 'epoch': 2} {'type': 'loss', 'content': 0.16476847231388092, 'timestamp': '2025-09-30 22:22:06.871887', 'step': 8099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:06.906963', 'step': 8099, 'epoch': 2} {'type': 'loss', 'content': 0.08507976680994034, 'timestamp': '2025-09-30 22:22:06.931680', 'step': 8100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:06.971738', 'step': 8100, 'epoch': 2} {'type': 'loss', 'content': 0.07586770504713058, 'timestamp': '2025-09-30 22:22:06.978698', 'step': 8101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.015099', 'step': 8101, 'epoch': 2} {'type': 'loss', 'content': 0.06646287441253662, 'timestamp': '2025-09-30 22:22:07.020414', 'step': 8102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.056392', 'step': 8102, 'epoch': 2} {'type': 'loss', 'content': 0.13578929007053375, 'timestamp': '2025-09-30 22:22:07.060045', 'step': 8103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.104797', 'step': 8103, 'epoch': 2} {'type': 'loss', 'content': 0.1263684630393982, 'timestamp': '2025-09-30 22:22:07.131081', 'step': 8104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:07.177562', 'step': 8104, 'epoch': 2} {'type': 'loss', 'content': 0.14654502272605896, 'timestamp': '2025-09-30 22:22:07.189008', 'step': 8105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.245317', 'step': 8105, 'epoch': 2} {'type': 'loss', 'content': 0.08919572830200195, 'timestamp': '2025-09-30 22:22:07.253405', 'step': 8106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:07.296793', 'step': 8106, 'epoch': 2} {'type': 'loss', 'content': 0.1738491952419281, 'timestamp': '2025-09-30 22:22:07.306771', 'step': 8107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:07.360456', 'step': 8107, 'epoch': 2} {'type': 'loss', 'content': 0.1641939878463745, 'timestamp': '2025-09-30 22:22:07.390879', 'step': 8108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.427641', 'step': 8108, 'epoch': 2} {'type': 'loss', 'content': 0.17666326463222504, 'timestamp': '2025-09-30 22:22:07.435516', 'step': 8109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:07.472684', 'step': 8109, 'epoch': 2} {'type': 'loss', 'content': 0.0998421162366867, 'timestamp': '2025-09-30 22:22:07.479617', 'step': 8110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:07.513639', 'step': 8110, 'epoch': 2} {'type': 'loss', 'content': 0.16865311563014984, 'timestamp': '2025-09-30 22:22:07.521707', 'step': 8111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:07.556661', 'step': 8111, 'epoch': 2} {'type': 'loss', 'content': 0.050020840018987656, 'timestamp': '2025-09-30 22:22:07.587919', 'step': 8112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:07.626832', 'step': 8112, 'epoch': 2} {'type': 'loss', 'content': 0.13838638365268707, 'timestamp': '2025-09-30 22:22:07.630143', 'step': 8113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.665313', 'step': 8113, 'epoch': 2} {'type': 'loss', 'content': 0.1305047571659088, 'timestamp': '2025-09-30 22:22:07.671846', 'step': 8114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:07.704249', 'step': 8114, 'epoch': 2} {'type': 'loss', 'content': 0.1767801195383072, 'timestamp': '2025-09-30 22:22:07.712058', 'step': 8115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.747909', 'step': 8115, 'epoch': 2} {'type': 'loss', 'content': 0.08411574363708496, 'timestamp': '2025-09-30 22:22:07.772169', 'step': 8116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:07.802397', 'step': 8116, 'epoch': 2} {'type': 'loss', 'content': 0.10523919761180878, 'timestamp': '2025-09-30 22:22:07.808732', 'step': 8117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.844365', 'step': 8117, 'epoch': 2} {'type': 'loss', 'content': 0.08847697079181671, 'timestamp': '2025-09-30 22:22:07.847252', 'step': 8118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:07.879488', 'step': 8118, 'epoch': 2} {'type': 'loss', 'content': 0.1071610227227211, 'timestamp': '2025-09-30 22:22:07.885092', 'step': 8119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.915609', 'step': 8119, 'epoch': 2} {'type': 'loss', 'content': 0.11618450284004211, 'timestamp': '2025-09-30 22:22:07.939535', 'step': 8120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:07.971528', 'step': 8120, 'epoch': 2} {'type': 'loss', 'content': 0.1674463003873825, 'timestamp': '2025-09-30 22:22:07.975887', 'step': 8121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.008108', 'step': 8121, 'epoch': 2} {'type': 'loss', 'content': 0.14915993809700012, 'timestamp': '2025-09-30 22:22:08.014497', 'step': 8122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.049306', 'step': 8122, 'epoch': 2} {'type': 'loss', 'content': 0.20659776031970978, 'timestamp': '2025-09-30 22:22:08.053295', 'step': 8123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.087438', 'step': 8123, 'epoch': 2} {'type': 'loss', 'content': 0.10352997481822968, 'timestamp': '2025-09-30 22:22:08.120472', 'step': 8124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.152176', 'step': 8124, 'epoch': 2} {'type': 'loss', 'content': 0.12520970404148102, 'timestamp': '2025-09-30 22:22:08.156208', 'step': 8125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:08.195395', 'step': 8125, 'epoch': 2} {'type': 'loss', 'content': 0.11973271518945694, 'timestamp': '2025-09-30 22:22:08.199917', 'step': 8126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:08.230726', 'step': 8126, 'epoch': 2} {'type': 'loss', 'content': 0.17714841663837433, 'timestamp': '2025-09-30 22:22:08.235550', 'step': 8127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:08.278030', 'step': 8127, 'epoch': 2} {'type': 'loss', 'content': 0.1315557360649109, 'timestamp': '2025-09-30 22:22:08.314287', 'step': 8128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:08.352867', 'step': 8128, 'epoch': 2} {'type': 'loss', 'content': 0.23511625826358795, 'timestamp': '2025-09-30 22:22:08.356331', 'step': 8129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:08.387776', 'step': 8129, 'epoch': 2} {'type': 'loss', 'content': 0.13244542479515076, 'timestamp': '2025-09-30 22:22:08.392495', 'step': 8130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.423567', 'step': 8130, 'epoch': 2} {'type': 'loss', 'content': 0.1153484582901001, 'timestamp': '2025-09-30 22:22:08.426920', 'step': 8131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:08.458022', 'step': 8131, 'epoch': 2} {'type': 'loss', 'content': 0.0792580246925354, 'timestamp': '2025-09-30 22:22:08.482710', 'step': 8132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:08.514127', 'step': 8132, 'epoch': 2} {'type': 'loss', 'content': 0.1347179114818573, 'timestamp': '2025-09-30 22:22:08.520446', 'step': 8133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.552270', 'step': 8133, 'epoch': 2} {'type': 'loss', 'content': 0.10852710902690887, 'timestamp': '2025-09-30 22:22:08.559366', 'step': 8134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.594940', 'step': 8134, 'epoch': 2} {'type': 'loss', 'content': 0.17319804430007935, 'timestamp': '2025-09-30 22:22:08.606361', 'step': 8135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:08.643845', 'step': 8135, 'epoch': 2} {'type': 'loss', 'content': 0.08342128992080688, 'timestamp': '2025-09-30 22:22:08.678038', 'step': 8136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.720158', 'step': 8136, 'epoch': 2} {'type': 'loss', 'content': 0.11396628618240356, 'timestamp': '2025-09-30 22:22:08.732638', 'step': 8137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:08.772068', 'step': 8137, 'epoch': 2} {'type': 'loss', 'content': 0.14055505394935608, 'timestamp': '2025-09-30 22:22:08.783755', 'step': 8138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:08.816702', 'step': 8138, 'epoch': 2} {'type': 'loss', 'content': 0.12688878178596497, 'timestamp': '2025-09-30 22:22:08.828715', 'step': 8139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:08.867556', 'step': 8139, 'epoch': 2} {'type': 'loss', 'content': 0.11746899038553238, 'timestamp': '2025-09-30 22:22:08.897955', 'step': 8140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:08.937392', 'step': 8140, 'epoch': 2} {'type': 'loss', 'content': 0.11545519530773163, 'timestamp': '2025-09-30 22:22:08.947326', 'step': 8141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:08.979587', 'step': 8141, 'epoch': 2} {'type': 'loss', 'content': 0.12988969683647156, 'timestamp': '2025-09-30 22:22:08.989394', 'step': 8142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.026409', 'step': 8142, 'epoch': 2} {'type': 'loss', 'content': 0.13950324058532715, 'timestamp': '2025-09-30 22:22:09.037880', 'step': 8143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.069835', 'step': 8143, 'epoch': 2} {'type': 'loss', 'content': 0.07529658079147339, 'timestamp': '2025-09-30 22:22:09.102949', 'step': 8144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.142195', 'step': 8144, 'epoch': 2} {'type': 'loss', 'content': 0.1490047127008438, 'timestamp': '2025-09-30 22:22:09.154049', 'step': 8145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.194343', 'step': 8145, 'epoch': 2} {'type': 'loss', 'content': 0.13259337842464447, 'timestamp': '2025-09-30 22:22:09.199108', 'step': 8146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.234510', 'step': 8146, 'epoch': 2} {'type': 'loss', 'content': 0.07679757475852966, 'timestamp': '2025-09-30 22:22:09.237748', 'step': 8147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:09.273920', 'step': 8147, 'epoch': 2} {'type': 'loss', 'content': 0.08347257971763611, 'timestamp': '2025-09-30 22:22:09.303780', 'step': 8148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.334854', 'step': 8148, 'epoch': 2} {'type': 'loss', 'content': 0.16087126731872559, 'timestamp': '2025-09-30 22:22:09.343365', 'step': 8149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:09.378760', 'step': 8149, 'epoch': 2} {'type': 'loss', 'content': 0.16810810565948486, 'timestamp': '2025-09-30 22:22:09.386309', 'step': 8150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.421691', 'step': 8150, 'epoch': 2} {'type': 'loss', 'content': 0.11641932278871536, 'timestamp': '2025-09-30 22:22:09.424817', 'step': 8151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.459342', 'step': 8151, 'epoch': 2} {'type': 'loss', 'content': 0.14219699800014496, 'timestamp': '2025-09-30 22:22:09.484862', 'step': 8152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.516517', 'step': 8152, 'epoch': 2} {'type': 'loss', 'content': 0.15955850481987, 'timestamp': '2025-09-30 22:22:09.527338', 'step': 8153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.567600', 'step': 8153, 'epoch': 2} {'type': 'loss', 'content': 0.2422197312116623, 'timestamp': '2025-09-30 22:22:09.578742', 'step': 8154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:09.617068', 'step': 8154, 'epoch': 2} {'type': 'loss', 'content': 0.1673697829246521, 'timestamp': '2025-09-30 22:22:09.626445', 'step': 8155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:09.663767', 'step': 8155, 'epoch': 2} {'type': 'loss', 'content': 0.11545303463935852, 'timestamp': '2025-09-30 22:22:09.696176', 'step': 8156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.727136', 'step': 8156, 'epoch': 2} {'type': 'loss', 'content': 0.12281282991170883, 'timestamp': '2025-09-30 22:22:09.732247', 'step': 8157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:09.764899', 'step': 8157, 'epoch': 2} {'type': 'loss', 'content': 0.104571633040905, 'timestamp': '2025-09-30 22:22:09.767727', 'step': 8158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.812153', 'step': 8158, 'epoch': 2} {'type': 'loss', 'content': 0.13951657712459564, 'timestamp': '2025-09-30 22:22:09.816508', 'step': 8159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.848768', 'step': 8159, 'epoch': 2} {'type': 'loss', 'content': 0.0710088387131691, 'timestamp': '2025-09-30 22:22:09.873240', 'step': 8160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:09.903556', 'step': 8160, 'epoch': 2} {'type': 'loss', 'content': 0.07656066119670868, 'timestamp': '2025-09-30 22:22:09.915928', 'step': 8161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:09.948242', 'step': 8161, 'epoch': 2} {'type': 'loss', 'content': 0.04767049476504326, 'timestamp': '2025-09-30 22:22:09.961594', 'step': 8162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:09.992627', 'step': 8162, 'epoch': 2} {'type': 'loss', 'content': 0.1390741765499115, 'timestamp': '2025-09-30 22:22:10.001197', 'step': 8163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:10.038015', 'step': 8163, 'epoch': 2} {'type': 'loss', 'content': 0.21570682525634766, 'timestamp': '2025-09-30 22:22:10.069338', 'step': 8164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:10.100753', 'step': 8164, 'epoch': 2} {'type': 'loss', 'content': 0.13950031995773315, 'timestamp': '2025-09-30 22:22:10.105168', 'step': 8165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.137307', 'step': 8165, 'epoch': 2} {'type': 'loss', 'content': 0.1644243597984314, 'timestamp': '2025-09-30 22:22:10.140472', 'step': 8166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:10.174606', 'step': 8166, 'epoch': 2} {'type': 'loss', 'content': 0.22535409033298492, 'timestamp': '2025-09-30 22:22:10.179420', 'step': 8167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.214193', 'step': 8167, 'epoch': 2} {'type': 'loss', 'content': 0.09756458550691605, 'timestamp': '2025-09-30 22:22:10.248478', 'step': 8168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:10.282159', 'step': 8168, 'epoch': 2} {'type': 'loss', 'content': 0.08396784961223602, 'timestamp': '2025-09-30 22:22:10.292963', 'step': 8169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.335128', 'step': 8169, 'epoch': 2} {'type': 'loss', 'content': 0.1156640276312828, 'timestamp': '2025-09-30 22:22:10.339090', 'step': 8170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.380906', 'step': 8170, 'epoch': 2} {'type': 'loss', 'content': 0.1470857858657837, 'timestamp': '2025-09-30 22:22:10.389895', 'step': 8171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:10.427249', 'step': 8171, 'epoch': 2} {'type': 'loss', 'content': 0.11689573526382446, 'timestamp': '2025-09-30 22:22:10.457060', 'step': 8172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:10.495646', 'step': 8172, 'epoch': 2} {'type': 'loss', 'content': 0.11601370573043823, 'timestamp': '2025-09-30 22:22:10.504534', 'step': 8173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.536125', 'step': 8173, 'epoch': 2} {'type': 'loss', 'content': 0.147308349609375, 'timestamp': '2025-09-30 22:22:10.545693', 'step': 8174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.577191', 'step': 8174, 'epoch': 2} {'type': 'loss', 'content': 0.08351248502731323, 'timestamp': '2025-09-30 22:22:10.589110', 'step': 8175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:10.627935', 'step': 8175, 'epoch': 2} {'type': 'loss', 'content': 0.08369161188602448, 'timestamp': '2025-09-30 22:22:10.660262', 'step': 8176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:10.698279', 'step': 8176, 'epoch': 2} {'type': 'loss', 'content': 0.0402999222278595, 'timestamp': '2025-09-30 22:22:10.711270', 'step': 8177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.744107', 'step': 8177, 'epoch': 2} {'type': 'loss', 'content': 0.16355502605438232, 'timestamp': '2025-09-30 22:22:10.752780', 'step': 8178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:10.783265', 'step': 8178, 'epoch': 2} {'type': 'loss', 'content': 0.15421365201473236, 'timestamp': '2025-09-30 22:22:10.792431', 'step': 8179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:10.823105', 'step': 8179, 'epoch': 2} {'type': 'loss', 'content': 0.056122031062841415, 'timestamp': '2025-09-30 22:22:10.847562', 'step': 8180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:10.885521', 'step': 8180, 'epoch': 2} {'type': 'loss', 'content': 0.07234405726194382, 'timestamp': '2025-09-30 22:22:10.897057', 'step': 8181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:10.938673', 'step': 8181, 'epoch': 2} {'type': 'loss', 'content': 0.15960091352462769, 'timestamp': '2025-09-30 22:22:10.942251', 'step': 8182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:10.973711', 'step': 8182, 'epoch': 2} {'type': 'loss', 'content': 0.13524353504180908, 'timestamp': '2025-09-30 22:22:10.979734', 'step': 8183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:11.016278', 'step': 8183, 'epoch': 2} {'type': 'loss', 'content': 0.04317564144730568, 'timestamp': '2025-09-30 22:22:11.040513', 'step': 8184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.072073', 'step': 8184, 'epoch': 2} {'type': 'loss', 'content': 0.13715237379074097, 'timestamp': '2025-09-30 22:22:11.079115', 'step': 8185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.113600', 'step': 8185, 'epoch': 2} {'type': 'loss', 'content': 0.1386931836605072, 'timestamp': '2025-09-30 22:22:11.119171', 'step': 8186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.153193', 'step': 8186, 'epoch': 2} {'type': 'loss', 'content': 0.14035917818546295, 'timestamp': '2025-09-30 22:22:11.160262', 'step': 8187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.195372', 'step': 8187, 'epoch': 2} {'type': 'loss', 'content': 0.06230070814490318, 'timestamp': '2025-09-30 22:22:11.224466', 'step': 8188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.259413', 'step': 8188, 'epoch': 2} {'type': 'loss', 'content': 0.11620020121335983, 'timestamp': '2025-09-30 22:22:11.269890', 'step': 8189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:11.309690', 'step': 8189, 'epoch': 2} {'type': 'loss', 'content': 0.15937092900276184, 'timestamp': '2025-09-30 22:22:11.320730', 'step': 8190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.359175', 'step': 8190, 'epoch': 2} {'type': 'loss', 'content': 0.12443143129348755, 'timestamp': '2025-09-30 22:22:11.362567', 'step': 8191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:11.399672', 'step': 8191, 'epoch': 2} {'type': 'loss', 'content': 0.10293088108301163, 'timestamp': '2025-09-30 22:22:11.429925', 'step': 8192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.464454', 'step': 8192, 'epoch': 2} {'type': 'loss', 'content': 0.15576256811618805, 'timestamp': '2025-09-30 22:22:11.471242', 'step': 8193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:11.505278', 'step': 8193, 'epoch': 2} {'type': 'loss', 'content': 0.16866609454154968, 'timestamp': '2025-09-30 22:22:11.514155', 'step': 8194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.550493', 'step': 8194, 'epoch': 2} {'type': 'loss', 'content': 0.1912636011838913, 'timestamp': '2025-09-30 22:22:11.562954', 'step': 8195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:11.599488', 'step': 8195, 'epoch': 2} {'type': 'loss', 'content': 0.06435608863830566, 'timestamp': '2025-09-30 22:22:11.628901', 'step': 8196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:11.666350', 'step': 8196, 'epoch': 2} {'type': 'loss', 'content': 0.13202181458473206, 'timestamp': '2025-09-30 22:22:11.673179', 'step': 8197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.707618', 'step': 8197, 'epoch': 2} {'type': 'loss', 'content': 0.0957566574215889, 'timestamp': '2025-09-30 22:22:11.714526', 'step': 8198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:11.750493', 'step': 8198, 'epoch': 2} {'type': 'loss', 'content': 0.1262868344783783, 'timestamp': '2025-09-30 22:22:11.756108', 'step': 8199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.792701', 'step': 8199, 'epoch': 2} {'type': 'loss', 'content': 0.048515915870666504, 'timestamp': '2025-09-30 22:22:11.821606', 'step': 8200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.858723', 'step': 8200, 'epoch': 2} {'type': 'loss', 'content': 0.10928259789943695, 'timestamp': '2025-09-30 22:22:11.867109', 'step': 8201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:11.902783', 'step': 8201, 'epoch': 2} {'type': 'loss', 'content': 0.09634018689393997, 'timestamp': '2025-09-30 22:22:11.906426', 'step': 8202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:11.942736', 'step': 8202, 'epoch': 2} {'type': 'loss', 'content': 0.23192663490772247, 'timestamp': '2025-09-30 22:22:11.954347', 'step': 8203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:11.992580', 'step': 8203, 'epoch': 2} {'type': 'loss', 'content': 0.16758468747138977, 'timestamp': '2025-09-30 22:22:12.024367', 'step': 8204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.060699', 'step': 8204, 'epoch': 2} {'type': 'loss', 'content': 0.059189651161432266, 'timestamp': '2025-09-30 22:22:12.070675', 'step': 8205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.107756', 'step': 8205, 'epoch': 2} {'type': 'loss', 'content': 0.09779791533946991, 'timestamp': '2025-09-30 22:22:12.117859', 'step': 8206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:12.155393', 'step': 8206, 'epoch': 2} {'type': 'loss', 'content': 0.15324625372886658, 'timestamp': '2025-09-30 22:22:12.162446', 'step': 8207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.198449', 'step': 8207, 'epoch': 2} {'type': 'loss', 'content': 0.15011484920978546, 'timestamp': '2025-09-30 22:22:12.223247', 'step': 8208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:12.261542', 'step': 8208, 'epoch': 2} {'type': 'loss', 'content': 0.12152419984340668, 'timestamp': '2025-09-30 22:22:12.271312', 'step': 8209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.310948', 'step': 8209, 'epoch': 2} {'type': 'loss', 'content': 0.13815666735172272, 'timestamp': '2025-09-30 22:22:12.323799', 'step': 8210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.362959', 'step': 8210, 'epoch': 2} {'type': 'loss', 'content': 0.1575065553188324, 'timestamp': '2025-09-30 22:22:12.375986', 'step': 8211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.417657', 'step': 8211, 'epoch': 2} {'type': 'loss', 'content': 0.1609194576740265, 'timestamp': '2025-09-30 22:22:12.450185', 'step': 8212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.487746', 'step': 8212, 'epoch': 2} {'type': 'loss', 'content': 0.15801316499710083, 'timestamp': '2025-09-30 22:22:12.495170', 'step': 8213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.526348', 'step': 8213, 'epoch': 2} {'type': 'loss', 'content': 0.19283227622509003, 'timestamp': '2025-09-30 22:22:12.536218', 'step': 8214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:12.575166', 'step': 8214, 'epoch': 2} {'type': 'loss', 'content': 0.12477876991033554, 'timestamp': '2025-09-30 22:22:12.584669', 'step': 8215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:12.622942', 'step': 8215, 'epoch': 2} {'type': 'loss', 'content': 0.08832160383462906, 'timestamp': '2025-09-30 22:22:12.653994', 'step': 8216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:12.684325', 'step': 8216, 'epoch': 2} {'type': 'loss', 'content': 0.13195601105690002, 'timestamp': '2025-09-30 22:22:12.691749', 'step': 8217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.727773', 'step': 8217, 'epoch': 2} {'type': 'loss', 'content': 0.2713773548603058, 'timestamp': '2025-09-30 22:22:12.738222', 'step': 8218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.774161', 'step': 8218, 'epoch': 2} {'type': 'loss', 'content': 0.043821800500154495, 'timestamp': '2025-09-30 22:22:12.785568', 'step': 8219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.824827', 'step': 8219, 'epoch': 2} {'type': 'loss', 'content': 0.07662739604711533, 'timestamp': '2025-09-30 22:22:12.851146', 'step': 8220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.885830', 'step': 8220, 'epoch': 2} {'type': 'loss', 'content': 0.07260052859783173, 'timestamp': '2025-09-30 22:22:12.893826', 'step': 8221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:12.929992', 'step': 8221, 'epoch': 2} {'type': 'loss', 'content': 0.09620936959981918, 'timestamp': '2025-09-30 22:22:12.937714', 'step': 8222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:12.975680', 'step': 8222, 'epoch': 2} {'type': 'loss', 'content': 0.19883818924427032, 'timestamp': '2025-09-30 22:22:12.982807', 'step': 8223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.021449', 'step': 8223, 'epoch': 2} {'type': 'loss', 'content': 0.055964574217796326, 'timestamp': '2025-09-30 22:22:13.046223', 'step': 8224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:13.080844', 'step': 8224, 'epoch': 2} {'type': 'loss', 'content': 0.1995312124490738, 'timestamp': '2025-09-30 22:22:13.088226', 'step': 8225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.123156', 'step': 8225, 'epoch': 2} {'type': 'loss', 'content': 0.1653822660446167, 'timestamp': '2025-09-30 22:22:13.131779', 'step': 8226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:13.167075', 'step': 8226, 'epoch': 2} {'type': 'loss', 'content': 0.06719879806041718, 'timestamp': '2025-09-30 22:22:13.174799', 'step': 8227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.215163', 'step': 8227, 'epoch': 2} {'type': 'loss', 'content': 0.14413009583950043, 'timestamp': '2025-09-30 22:22:13.245027', 'step': 8228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:13.281727', 'step': 8228, 'epoch': 2} {'type': 'loss', 'content': 0.13106170296669006, 'timestamp': '2025-09-30 22:22:13.292612', 'step': 8229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.331880', 'step': 8229, 'epoch': 2} {'type': 'loss', 'content': 0.0978713408112526, 'timestamp': '2025-09-30 22:22:13.341635', 'step': 8230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:13.379337', 'step': 8230, 'epoch': 2} {'type': 'loss', 'content': 0.1132109984755516, 'timestamp': '2025-09-30 22:22:13.390988', 'step': 8231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:13.432418', 'step': 8231, 'epoch': 2} {'type': 'loss', 'content': 0.04006722569465637, 'timestamp': '2025-09-30 22:22:13.465319', 'step': 8232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.504471', 'step': 8232, 'epoch': 2} {'type': 'loss', 'content': 0.10803699493408203, 'timestamp': '2025-09-30 22:22:13.515909', 'step': 8233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:13.553390', 'step': 8233, 'epoch': 2} {'type': 'loss', 'content': 0.12701277434825897, 'timestamp': '2025-09-30 22:22:13.562939', 'step': 8234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:13.598691', 'step': 8234, 'epoch': 2} {'type': 'loss', 'content': 0.06726760417222977, 'timestamp': '2025-09-30 22:22:13.605531', 'step': 8235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:13.641521', 'step': 8235, 'epoch': 2} {'type': 'loss', 'content': 0.1066897064447403, 'timestamp': '2025-09-30 22:22:13.669507', 'step': 8236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:13.706672', 'step': 8236, 'epoch': 2} {'type': 'loss', 'content': 0.18761730194091797, 'timestamp': '2025-09-30 22:22:13.719110', 'step': 8237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:13.754113', 'step': 8237, 'epoch': 2} {'type': 'loss', 'content': 0.04378179460763931, 'timestamp': '2025-09-30 22:22:13.762151', 'step': 8238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:13.799265', 'step': 8238, 'epoch': 2} {'type': 'loss', 'content': 0.13870564103126526, 'timestamp': '2025-09-30 22:22:13.810347', 'step': 8239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:13.842556', 'step': 8239, 'epoch': 2} {'type': 'loss', 'content': 0.16899311542510986, 'timestamp': '2025-09-30 22:22:13.874320', 'step': 8240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:13.913330', 'step': 8240, 'epoch': 2} {'type': 'loss', 'content': 0.06724093109369278, 'timestamp': '2025-09-30 22:22:13.923672', 'step': 8241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:13.962034', 'step': 8241, 'epoch': 2} {'type': 'loss', 'content': 0.11611364781856537, 'timestamp': '2025-09-30 22:22:13.975543', 'step': 8242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.015700', 'step': 8242, 'epoch': 2} {'type': 'loss', 'content': 0.1030861884355545, 'timestamp': '2025-09-30 22:22:14.019755', 'step': 8243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:14.059872', 'step': 8243, 'epoch': 2} {'type': 'loss', 'content': 0.12349613010883331, 'timestamp': '2025-09-30 22:22:14.093431', 'step': 8244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.126650', 'step': 8244, 'epoch': 2} {'type': 'loss', 'content': 0.08923478424549103, 'timestamp': '2025-09-30 22:22:14.135476', 'step': 8245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.170985', 'step': 8245, 'epoch': 2} {'type': 'loss', 'content': 0.186308816075325, 'timestamp': '2025-09-30 22:22:14.178827', 'step': 8246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:14.209586', 'step': 8246, 'epoch': 2} {'type': 'loss', 'content': 0.15429046750068665, 'timestamp': '2025-09-30 22:22:14.215744', 'step': 8247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:14.249162', 'step': 8247, 'epoch': 2} {'type': 'loss', 'content': 0.07772176712751389, 'timestamp': '2025-09-30 22:22:14.279167', 'step': 8248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.317582', 'step': 8248, 'epoch': 2} {'type': 'loss', 'content': 0.06499277055263519, 'timestamp': '2025-09-30 22:22:14.326477', 'step': 8249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:14.363452', 'step': 8249, 'epoch': 2} {'type': 'loss', 'content': 0.19364969432353973, 'timestamp': '2025-09-30 22:22:14.370212', 'step': 8250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.408474', 'step': 8250, 'epoch': 2} {'type': 'loss', 'content': 0.16404947638511658, 'timestamp': '2025-09-30 22:22:14.419328', 'step': 8251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.461146', 'step': 8251, 'epoch': 2} {'type': 'loss', 'content': 0.10444942116737366, 'timestamp': '2025-09-30 22:22:14.486548', 'step': 8252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.520095', 'step': 8252, 'epoch': 2} {'type': 'loss', 'content': 0.11530698090791702, 'timestamp': '2025-09-30 22:22:14.531129', 'step': 8253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.566925', 'step': 8253, 'epoch': 2} {'type': 'loss', 'content': 0.18091632425785065, 'timestamp': '2025-09-30 22:22:14.570705', 'step': 8254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:14.606468', 'step': 8254, 'epoch': 2} {'type': 'loss', 'content': 0.06926900893449783, 'timestamp': '2025-09-30 22:22:14.613114', 'step': 8255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.647454', 'step': 8255, 'epoch': 2} {'type': 'loss', 'content': 0.10084760934114456, 'timestamp': '2025-09-30 22:22:14.675529', 'step': 8256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.709392', 'step': 8256, 'epoch': 2} {'type': 'loss', 'content': 0.07876751571893692, 'timestamp': '2025-09-30 22:22:14.717759', 'step': 8257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.754854', 'step': 8257, 'epoch': 2} {'type': 'loss', 'content': 0.06607960164546967, 'timestamp': '2025-09-30 22:22:14.757741', 'step': 8258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:14.788431', 'step': 8258, 'epoch': 2} {'type': 'loss', 'content': 0.12818853557109833, 'timestamp': '2025-09-30 22:22:14.794952', 'step': 8259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.830142', 'step': 8259, 'epoch': 2} {'type': 'loss', 'content': 0.18953321874141693, 'timestamp': '2025-09-30 22:22:14.856965', 'step': 8260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:14.891984', 'step': 8260, 'epoch': 2} {'type': 'loss', 'content': 0.15422706305980682, 'timestamp': '2025-09-30 22:22:14.902074', 'step': 8261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:14.936814', 'step': 8261, 'epoch': 2} {'type': 'loss', 'content': 0.11444146931171417, 'timestamp': '2025-09-30 22:22:14.951823', 'step': 8262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:14.989982', 'step': 8262, 'epoch': 2} {'type': 'loss', 'content': 0.21483492851257324, 'timestamp': '2025-09-30 22:22:14.995823', 'step': 8263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:15.038837', 'step': 8263, 'epoch': 2} {'type': 'loss', 'content': 0.08985088765621185, 'timestamp': '2025-09-30 22:22:15.064046', 'step': 8264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.095529', 'step': 8264, 'epoch': 2} {'type': 'loss', 'content': 0.04829218611121178, 'timestamp': '2025-09-30 22:22:15.103417', 'step': 8265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.137542', 'step': 8265, 'epoch': 2} {'type': 'loss', 'content': 0.14459490776062012, 'timestamp': '2025-09-30 22:22:15.144277', 'step': 8266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:15.180342', 'step': 8266, 'epoch': 2} {'type': 'loss', 'content': 0.13400641083717346, 'timestamp': '2025-09-30 22:22:15.187162', 'step': 8267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:15.220469', 'step': 8267, 'epoch': 2} {'type': 'loss', 'content': 0.16970959305763245, 'timestamp': '2025-09-30 22:22:15.252381', 'step': 8268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.289286', 'step': 8268, 'epoch': 2} {'type': 'loss', 'content': 0.14416582882404327, 'timestamp': '2025-09-30 22:22:15.297388', 'step': 8269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.340492', 'step': 8269, 'epoch': 2} {'type': 'loss', 'content': 0.11195076256990433, 'timestamp': '2025-09-30 22:22:15.350104', 'step': 8270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.388298', 'step': 8270, 'epoch': 2} {'type': 'loss', 'content': 0.09240282326936722, 'timestamp': '2025-09-30 22:22:15.396508', 'step': 8271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:15.432307', 'step': 8271, 'epoch': 2} {'type': 'loss', 'content': 0.19713015854358673, 'timestamp': '2025-09-30 22:22:15.458493', 'step': 8272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:15.490804', 'step': 8272, 'epoch': 2} {'type': 'loss', 'content': 0.061806123703718185, 'timestamp': '2025-09-30 22:22:15.500786', 'step': 8273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.535809', 'step': 8273, 'epoch': 2} {'type': 'loss', 'content': 0.12754003703594208, 'timestamp': '2025-09-30 22:22:15.546781', 'step': 8274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:15.583988', 'step': 8274, 'epoch': 2} {'type': 'loss', 'content': 0.08002878725528717, 'timestamp': '2025-09-30 22:22:15.591700', 'step': 8275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.630957', 'step': 8275, 'epoch': 2} {'type': 'loss', 'content': 0.08223655074834824, 'timestamp': '2025-09-30 22:22:15.660319', 'step': 8276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:15.697388', 'step': 8276, 'epoch': 2} {'type': 'loss', 'content': 0.1714211255311966, 'timestamp': '2025-09-30 22:22:15.703102', 'step': 8277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:15.736563', 'step': 8277, 'epoch': 2} {'type': 'loss', 'content': 0.12069536000490189, 'timestamp': '2025-09-30 22:22:15.748337', 'step': 8278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:15.785834', 'step': 8278, 'epoch': 2} {'type': 'loss', 'content': 0.14987923204898834, 'timestamp': '2025-09-30 22:22:15.792224', 'step': 8279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.825637', 'step': 8279, 'epoch': 2} {'type': 'loss', 'content': 0.11591865867376328, 'timestamp': '2025-09-30 22:22:15.849832', 'step': 8280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:15.885172', 'step': 8280, 'epoch': 2} {'type': 'loss', 'content': 0.1435113251209259, 'timestamp': '2025-09-30 22:22:15.892225', 'step': 8281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:15.925804', 'step': 8281, 'epoch': 2} {'type': 'loss', 'content': 0.09687953442335129, 'timestamp': '2025-09-30 22:22:15.932824', 'step': 8282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:15.967761', 'step': 8282, 'epoch': 2} {'type': 'loss', 'content': 0.09542188793420792, 'timestamp': '2025-09-30 22:22:15.977225', 'step': 8283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.013048', 'step': 8283, 'epoch': 2} {'type': 'loss', 'content': 0.10179992765188217, 'timestamp': '2025-09-30 22:22:16.044368', 'step': 8284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:16.075775', 'step': 8284, 'epoch': 2} {'type': 'loss', 'content': 0.20443329215049744, 'timestamp': '2025-09-30 22:22:16.079586', 'step': 8285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:16.110784', 'step': 8285, 'epoch': 2} {'type': 'loss', 'content': 0.08997749537229538, 'timestamp': '2025-09-30 22:22:16.114986', 'step': 8286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.147086', 'step': 8286, 'epoch': 2} {'type': 'loss', 'content': 0.26380881667137146, 'timestamp': '2025-09-30 22:22:16.150850', 'step': 8287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:16.182344', 'step': 8287, 'epoch': 2} {'type': 'loss', 'content': 0.161960631608963, 'timestamp': '2025-09-30 22:22:16.206733', 'step': 8288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:16.238515', 'step': 8288, 'epoch': 2} {'type': 'loss', 'content': 0.1117381751537323, 'timestamp': '2025-09-30 22:22:16.249990', 'step': 8289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.292103', 'step': 8289, 'epoch': 2} {'type': 'loss', 'content': 0.1066426932811737, 'timestamp': '2025-09-30 22:22:16.296102', 'step': 8290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.328333', 'step': 8290, 'epoch': 2} {'type': 'loss', 'content': 0.1558985859155655, 'timestamp': '2025-09-30 22:22:16.331724', 'step': 8291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.363312', 'step': 8291, 'epoch': 2} {'type': 'loss', 'content': 0.12419313192367554, 'timestamp': '2025-09-30 22:22:16.388449', 'step': 8292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.421123', 'step': 8292, 'epoch': 2} {'type': 'loss', 'content': 0.06855928152799606, 'timestamp': '2025-09-30 22:22:16.424511', 'step': 8293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.455894', 'step': 8293, 'epoch': 2} {'type': 'loss', 'content': 0.11423363536596298, 'timestamp': '2025-09-30 22:22:16.459846', 'step': 8294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:16.489827', 'step': 8294, 'epoch': 2} {'type': 'loss', 'content': 0.0697949007153511, 'timestamp': '2025-09-30 22:22:16.493331', 'step': 8295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:16.524281', 'step': 8295, 'epoch': 2} {'type': 'loss', 'content': 0.08264487236738205, 'timestamp': '2025-09-30 22:22:16.549140', 'step': 8296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.581782', 'step': 8296, 'epoch': 2} {'type': 'loss', 'content': 0.159148171544075, 'timestamp': '2025-09-30 22:22:16.585852', 'step': 8297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:16.618367', 'step': 8297, 'epoch': 2} {'type': 'loss', 'content': 0.12491629272699356, 'timestamp': '2025-09-30 22:22:16.623326', 'step': 8298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.653632', 'step': 8298, 'epoch': 2} {'type': 'loss', 'content': 0.2491813600063324, 'timestamp': '2025-09-30 22:22:16.656338', 'step': 8299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.687309', 'step': 8299, 'epoch': 2} {'type': 'loss', 'content': 0.1603153496980667, 'timestamp': '2025-09-30 22:22:16.713675', 'step': 8300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.745813', 'step': 8300, 'epoch': 2} {'type': 'loss', 'content': 0.16343456506729126, 'timestamp': '2025-09-30 22:22:16.750292', 'step': 8301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.782079', 'step': 8301, 'epoch': 2} {'type': 'loss', 'content': 0.056319817900657654, 'timestamp': '2025-09-30 22:22:16.785677', 'step': 8302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.817488', 'step': 8302, 'epoch': 2} {'type': 'loss', 'content': 0.16240860521793365, 'timestamp': '2025-09-30 22:22:16.820112', 'step': 8303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.851964', 'step': 8303, 'epoch': 2} {'type': 'loss', 'content': 0.1419106423854828, 'timestamp': '2025-09-30 22:22:16.877912', 'step': 8304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.909024', 'step': 8304, 'epoch': 2} {'type': 'loss', 'content': 0.12264753133058548, 'timestamp': '2025-09-30 22:22:16.912847', 'step': 8305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:16.943667', 'step': 8305, 'epoch': 2} {'type': 'loss', 'content': 0.1387142390012741, 'timestamp': '2025-09-30 22:22:16.946463', 'step': 8306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:16.977352', 'step': 8306, 'epoch': 2} {'type': 'loss', 'content': 0.09320531040430069, 'timestamp': '2025-09-30 22:22:16.981522', 'step': 8307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:17.013263', 'step': 8307, 'epoch': 2} {'type': 'loss', 'content': 0.14027616381645203, 'timestamp': '2025-09-30 22:22:17.039753', 'step': 8308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:17.070200', 'step': 8308, 'epoch': 2} {'type': 'loss', 'content': 0.1046290397644043, 'timestamp': '2025-09-30 22:22:17.073583', 'step': 8309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:17.104669', 'step': 8309, 'epoch': 2} {'type': 'loss', 'content': 0.09377995878458023, 'timestamp': '2025-09-30 22:22:17.108052', 'step': 8310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.139569', 'step': 8310, 'epoch': 2} {'type': 'loss', 'content': 0.1386924535036087, 'timestamp': '2025-09-30 22:22:17.143668', 'step': 8311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:17.177160', 'step': 8311, 'epoch': 2} {'type': 'loss', 'content': 0.10112544149160385, 'timestamp': '2025-09-30 22:22:17.202078', 'step': 8312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:17.237096', 'step': 8312, 'epoch': 2} {'type': 'loss', 'content': 0.023275010287761688, 'timestamp': '2025-09-30 22:22:17.240818', 'step': 8313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:17.272424', 'step': 8313, 'epoch': 2} {'type': 'loss', 'content': 0.13898296654224396, 'timestamp': '2025-09-30 22:22:17.278364', 'step': 8314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.310090', 'step': 8314, 'epoch': 2} {'type': 'loss', 'content': 0.14330901205539703, 'timestamp': '2025-09-30 22:22:17.314996', 'step': 8315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:17.347722', 'step': 8315, 'epoch': 2} {'type': 'loss', 'content': 0.1375233381986618, 'timestamp': '2025-09-30 22:22:17.373589', 'step': 8316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:17.404570', 'step': 8316, 'epoch': 2} {'type': 'loss', 'content': 0.10668688267469406, 'timestamp': '2025-09-30 22:22:17.409231', 'step': 8317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:17.441841', 'step': 8317, 'epoch': 2} {'type': 'loss', 'content': 0.17144837975502014, 'timestamp': '2025-09-30 22:22:17.445574', 'step': 8318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.477444', 'step': 8318, 'epoch': 2} {'type': 'loss', 'content': 0.05714043229818344, 'timestamp': '2025-09-30 22:22:17.480990', 'step': 8319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:17.512707', 'step': 8319, 'epoch': 2} {'type': 'loss', 'content': 0.11328468471765518, 'timestamp': '2025-09-30 22:22:17.538325', 'step': 8320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:17.569626', 'step': 8320, 'epoch': 2} {'type': 'loss', 'content': 0.07524058222770691, 'timestamp': '2025-09-30 22:22:17.573575', 'step': 8321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.605279', 'step': 8321, 'epoch': 2} {'type': 'loss', 'content': 0.05102965608239174, 'timestamp': '2025-09-30 22:22:17.609130', 'step': 8322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:17.641442', 'step': 8322, 'epoch': 2} {'type': 'loss', 'content': 0.2220991998910904, 'timestamp': '2025-09-30 22:22:17.645625', 'step': 8323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:17.676185', 'step': 8323, 'epoch': 2} {'type': 'loss', 'content': 0.08163155615329742, 'timestamp': '2025-09-30 22:22:17.701493', 'step': 8324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.735416', 'step': 8324, 'epoch': 2} {'type': 'loss', 'content': 0.17035746574401855, 'timestamp': '2025-09-30 22:22:17.741491', 'step': 8325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.771633', 'step': 8325, 'epoch': 2} {'type': 'loss', 'content': 0.17082209885120392, 'timestamp': '2025-09-30 22:22:17.775424', 'step': 8326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:17.806640', 'step': 8326, 'epoch': 2} {'type': 'loss', 'content': 0.11397279053926468, 'timestamp': '2025-09-30 22:22:17.810668', 'step': 8327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.841700', 'step': 8327, 'epoch': 2} {'type': 'loss', 'content': 0.18819718062877655, 'timestamp': '2025-09-30 22:22:17.867558', 'step': 8328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:17.899468', 'step': 8328, 'epoch': 2} {'type': 'loss', 'content': 0.12064170837402344, 'timestamp': '2025-09-30 22:22:17.902420', 'step': 8329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.934538', 'step': 8329, 'epoch': 2} {'type': 'loss', 'content': 0.1465628296136856, 'timestamp': '2025-09-30 22:22:17.940234', 'step': 8330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:17.977952', 'step': 8330, 'epoch': 2} {'type': 'loss', 'content': 0.061906542629003525, 'timestamp': '2025-09-30 22:22:17.983125', 'step': 8331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:18.016289', 'step': 8331, 'epoch': 2} {'type': 'loss', 'content': 0.131203293800354, 'timestamp': '2025-09-30 22:22:18.044445', 'step': 8332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:18.077681', 'step': 8332, 'epoch': 2} {'type': 'loss', 'content': 0.09152808785438538, 'timestamp': '2025-09-30 22:22:18.082555', 'step': 8333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.115659', 'step': 8333, 'epoch': 2} {'type': 'loss', 'content': 0.21145638823509216, 'timestamp': '2025-09-30 22:22:18.120998', 'step': 8334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:18.154649', 'step': 8334, 'epoch': 2} {'type': 'loss', 'content': 0.09244463592767715, 'timestamp': '2025-09-30 22:22:18.158266', 'step': 8335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.191254', 'step': 8335, 'epoch': 2} {'type': 'loss', 'content': 0.11334000527858734, 'timestamp': '2025-09-30 22:22:18.217338', 'step': 8336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.255782', 'step': 8336, 'epoch': 2} {'type': 'loss', 'content': 0.11539074033498764, 'timestamp': '2025-09-30 22:22:18.261372', 'step': 8337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:18.295161', 'step': 8337, 'epoch': 2} {'type': 'loss', 'content': 0.13214217126369476, 'timestamp': '2025-09-30 22:22:18.301450', 'step': 8338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.334460', 'step': 8338, 'epoch': 2} {'type': 'loss', 'content': 0.11613713949918747, 'timestamp': '2025-09-30 22:22:18.349617', 'step': 8339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.383424', 'step': 8339, 'epoch': 2} {'type': 'loss', 'content': 0.07193509489297867, 'timestamp': '2025-09-30 22:22:18.411467', 'step': 8340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.444181', 'step': 8340, 'epoch': 2} {'type': 'loss', 'content': 0.06877551972866058, 'timestamp': '2025-09-30 22:22:18.450829', 'step': 8341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.485365', 'step': 8341, 'epoch': 2} {'type': 'loss', 'content': 0.15895797312259674, 'timestamp': '2025-09-30 22:22:18.492256', 'step': 8342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.525870', 'step': 8342, 'epoch': 2} {'type': 'loss', 'content': 0.11047210544347763, 'timestamp': '2025-09-30 22:22:18.530154', 'step': 8343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.562567', 'step': 8343, 'epoch': 2} {'type': 'loss', 'content': 0.14205197989940643, 'timestamp': '2025-09-30 22:22:18.590531', 'step': 8344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.624267', 'step': 8344, 'epoch': 2} {'type': 'loss', 'content': 0.10099171102046967, 'timestamp': '2025-09-30 22:22:18.629539', 'step': 8345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:18.660350', 'step': 8345, 'epoch': 2} {'type': 'loss', 'content': 0.24842053651809692, 'timestamp': '2025-09-30 22:22:18.664231', 'step': 8346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:18.698184', 'step': 8346, 'epoch': 2} {'type': 'loss', 'content': 0.1513959914445877, 'timestamp': '2025-09-30 22:22:18.703132', 'step': 8347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.737412', 'step': 8347, 'epoch': 2} {'type': 'loss', 'content': 0.17658482491970062, 'timestamp': '2025-09-30 22:22:18.762149', 'step': 8348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.796174', 'step': 8348, 'epoch': 2} {'type': 'loss', 'content': 0.20909811556339264, 'timestamp': '2025-09-30 22:22:18.802564', 'step': 8349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:18.835726', 'step': 8349, 'epoch': 2} {'type': 'loss', 'content': 0.049209386110305786, 'timestamp': '2025-09-30 22:22:18.841470', 'step': 8350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:18.872371', 'step': 8350, 'epoch': 2} {'type': 'loss', 'content': 0.13658054172992706, 'timestamp': '2025-09-30 22:22:18.878456', 'step': 8351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:18.911384', 'step': 8351, 'epoch': 2} {'type': 'loss', 'content': 0.09774681180715561, 'timestamp': '2025-09-30 22:22:18.938298', 'step': 8352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:18.968847', 'step': 8352, 'epoch': 2} {'type': 'loss', 'content': 0.12362322956323624, 'timestamp': '2025-09-30 22:22:18.975499', 'step': 8353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.008340', 'step': 8353, 'epoch': 2} {'type': 'loss', 'content': 0.14042149484157562, 'timestamp': '2025-09-30 22:22:19.014106', 'step': 8354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.047718', 'step': 8354, 'epoch': 2} {'type': 'loss', 'content': 0.09403994679450989, 'timestamp': '2025-09-30 22:22:19.050831', 'step': 8355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.086720', 'step': 8355, 'epoch': 2} {'type': 'loss', 'content': 0.1151154488325119, 'timestamp': '2025-09-30 22:22:19.113820', 'step': 8356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:19.144491', 'step': 8356, 'epoch': 2} {'type': 'loss', 'content': 0.1246727705001831, 'timestamp': '2025-09-30 22:22:19.149652', 'step': 8357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.179936', 'step': 8357, 'epoch': 2} {'type': 'loss', 'content': 0.10436531901359558, 'timestamp': '2025-09-30 22:22:19.187217', 'step': 8358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.220268', 'step': 8358, 'epoch': 2} {'type': 'loss', 'content': 0.11332492530345917, 'timestamp': '2025-09-30 22:22:19.227714', 'step': 8359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.261808', 'step': 8359, 'epoch': 2} {'type': 'loss', 'content': 0.2543933093547821, 'timestamp': '2025-09-30 22:22:19.287803', 'step': 8360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.322741', 'step': 8360, 'epoch': 2} {'type': 'loss', 'content': 0.08402177691459656, 'timestamp': '2025-09-30 22:22:19.334263', 'step': 8361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:19.371950', 'step': 8361, 'epoch': 2} {'type': 'loss', 'content': 0.09919439256191254, 'timestamp': '2025-09-30 22:22:19.375595', 'step': 8362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:19.407088', 'step': 8362, 'epoch': 2} {'type': 'loss', 'content': 0.08303317427635193, 'timestamp': '2025-09-30 22:22:19.417288', 'step': 8363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:19.455134', 'step': 8363, 'epoch': 2} {'type': 'loss', 'content': 0.08798684924840927, 'timestamp': '2025-09-30 22:22:19.479099', 'step': 8364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:19.513765', 'step': 8364, 'epoch': 2} {'type': 'loss', 'content': 0.16318976879119873, 'timestamp': '2025-09-30 22:22:19.516657', 'step': 8365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:19.547069', 'step': 8365, 'epoch': 2} {'type': 'loss', 'content': 0.17360737919807434, 'timestamp': '2025-09-30 22:22:19.551512', 'step': 8366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.583851', 'step': 8366, 'epoch': 2} {'type': 'loss', 'content': 0.1197294220328331, 'timestamp': '2025-09-30 22:22:19.589730', 'step': 8367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.623003', 'step': 8367, 'epoch': 2} {'type': 'loss', 'content': 0.08127269893884659, 'timestamp': '2025-09-30 22:22:19.649627', 'step': 8368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:19.682288', 'step': 8368, 'epoch': 2} {'type': 'loss', 'content': 0.1465219259262085, 'timestamp': '2025-09-30 22:22:19.685357', 'step': 8369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.718651', 'step': 8369, 'epoch': 2} {'type': 'loss', 'content': 0.12858308851718903, 'timestamp': '2025-09-30 22:22:19.723692', 'step': 8370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:19.755036', 'step': 8370, 'epoch': 2} {'type': 'loss', 'content': 0.1833980679512024, 'timestamp': '2025-09-30 22:22:19.765925', 'step': 8371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.798323', 'step': 8371, 'epoch': 2} {'type': 'loss', 'content': 0.1293809562921524, 'timestamp': '2025-09-30 22:22:19.824917', 'step': 8372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:19.859296', 'step': 8372, 'epoch': 2} {'type': 'loss', 'content': 0.12890905141830444, 'timestamp': '2025-09-30 22:22:19.863504', 'step': 8373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:19.896872', 'step': 8373, 'epoch': 2} {'type': 'loss', 'content': 0.09344080835580826, 'timestamp': '2025-09-30 22:22:19.899526', 'step': 8374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:22:19.933985', 'step': 8374, 'epoch': 2} {'type': 'loss', 'content': 0.23473814129829407, 'timestamp': '2025-09-30 22:22:19.941274', 'step': 8375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:19.974586', 'step': 8375, 'epoch': 2} {'type': 'loss', 'content': 0.12540346384048462, 'timestamp': '2025-09-30 22:22:20.000607', 'step': 8376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:20.032898', 'step': 8376, 'epoch': 2} {'type': 'loss', 'content': 0.2095596343278885, 'timestamp': '2025-09-30 22:22:20.037181', 'step': 8377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.071059', 'step': 8377, 'epoch': 2} {'type': 'loss', 'content': 0.12834154069423676, 'timestamp': '2025-09-30 22:22:20.077381', 'step': 8378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.109837', 'step': 8378, 'epoch': 2} {'type': 'loss', 'content': 0.17929646372795105, 'timestamp': '2025-09-30 22:22:20.114480', 'step': 8379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.146267', 'step': 8379, 'epoch': 2} {'type': 'loss', 'content': 0.11076349020004272, 'timestamp': '2025-09-30 22:22:20.173227', 'step': 8380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.205481', 'step': 8380, 'epoch': 2} {'type': 'loss', 'content': 0.21057187020778656, 'timestamp': '2025-09-30 22:22:20.210842', 'step': 8381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.242545', 'step': 8381, 'epoch': 2} {'type': 'loss', 'content': 0.12460871785879135, 'timestamp': '2025-09-30 22:22:20.245133', 'step': 8382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:20.287464', 'step': 8382, 'epoch': 2} {'type': 'loss', 'content': 0.13109616935253143, 'timestamp': '2025-09-30 22:22:20.292639', 'step': 8383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:20.326649', 'step': 8383, 'epoch': 2} {'type': 'loss', 'content': 0.21122318506240845, 'timestamp': '2025-09-30 22:22:20.350884', 'step': 8384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:20.400629', 'step': 8384, 'epoch': 2} {'type': 'loss', 'content': 0.09773748368024826, 'timestamp': '2025-09-30 22:22:20.403491', 'step': 8385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:20.448772', 'step': 8385, 'epoch': 2} {'type': 'loss', 'content': 0.23001910746097565, 'timestamp': '2025-09-30 22:22:20.452237', 'step': 8386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:20.489175', 'step': 8386, 'epoch': 2} {'type': 'loss', 'content': 0.23369675874710083, 'timestamp': '2025-09-30 22:22:20.492105', 'step': 8387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:20.531455', 'step': 8387, 'epoch': 2} {'type': 'loss', 'content': 0.1058146208524704, 'timestamp': '2025-09-30 22:22:20.556739', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:22:28.774448', 'step': 8388, 'epoch': 2} {'type': 'pplx', 'content': 11025.523715713298, 'timestamp': '2025-09-30 22:22:28.787623', 'step': 8388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:28.825470', 'step': 8388, 'epoch': 2} {'type': 'loss', 'content': 0.10971858352422714, 'timestamp': '2025-09-30 22:22:28.837783', 'step': 8389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:28.880494', 'step': 8389, 'epoch': 2} {'type': 'loss', 'content': 0.0938129648566246, 'timestamp': '2025-09-30 22:22:28.891492', 'step': 8390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:28.930450', 'step': 8390, 'epoch': 2} {'type': 'loss', 'content': 0.07375309616327286, 'timestamp': '2025-09-30 22:22:28.941125', 'step': 8391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:28.977563', 'step': 8391, 'epoch': 2} {'type': 'loss', 'content': 0.19240230321884155, 'timestamp': '2025-09-30 22:22:29.010751', 'step': 8392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.051261', 'step': 8392, 'epoch': 2} {'type': 'loss', 'content': 0.09456197917461395, 'timestamp': '2025-09-30 22:22:29.055569', 'step': 8393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.088758', 'step': 8393, 'epoch': 2} {'type': 'loss', 'content': 0.09640331566333771, 'timestamp': '2025-09-30 22:22:29.101870', 'step': 8394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.144386', 'step': 8394, 'epoch': 2} {'type': 'loss', 'content': 0.12951619923114777, 'timestamp': '2025-09-30 22:22:29.154858', 'step': 8395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.197181', 'step': 8395, 'epoch': 2} {'type': 'loss', 'content': 0.09627116471529007, 'timestamp': '2025-09-30 22:22:29.222260', 'step': 8396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.263578', 'step': 8396, 'epoch': 2} {'type': 'loss', 'content': 0.13547436892986298, 'timestamp': '2025-09-30 22:22:29.271766', 'step': 8397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:29.309981', 'step': 8397, 'epoch': 2} {'type': 'loss', 'content': 0.18568773567676544, 'timestamp': '2025-09-30 22:22:29.319858', 'step': 8398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:29.361261', 'step': 8398, 'epoch': 2} {'type': 'loss', 'content': 0.16161812841892242, 'timestamp': '2025-09-30 22:22:29.371995', 'step': 8399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.409357', 'step': 8399, 'epoch': 2} {'type': 'loss', 'content': 0.15539616346359253, 'timestamp': '2025-09-30 22:22:29.450006', 'step': 8400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.483054', 'step': 8400, 'epoch': 2} {'type': 'loss', 'content': 0.1272885650396347, 'timestamp': '2025-09-30 22:22:29.486518', 'step': 8401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.526871', 'step': 8401, 'epoch': 2} {'type': 'loss', 'content': 0.12468215823173523, 'timestamp': '2025-09-30 22:22:29.539286', 'step': 8402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.571289', 'step': 8402, 'epoch': 2} {'type': 'loss', 'content': 0.14019468426704407, 'timestamp': '2025-09-30 22:22:29.584596', 'step': 8403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.624991', 'step': 8403, 'epoch': 2} {'type': 'loss', 'content': 0.07982275635004044, 'timestamp': '2025-09-30 22:22:29.657471', 'step': 8404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.690058', 'step': 8404, 'epoch': 2} {'type': 'loss', 'content': 0.12093855440616608, 'timestamp': '2025-09-30 22:22:29.699673', 'step': 8405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.739650', 'step': 8405, 'epoch': 2} {'type': 'loss', 'content': 0.07610920071601868, 'timestamp': '2025-09-30 22:22:29.743439', 'step': 8406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.775444', 'step': 8406, 'epoch': 2} {'type': 'loss', 'content': 0.1507730334997177, 'timestamp': '2025-09-30 22:22:29.785025', 'step': 8407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:29.816537', 'step': 8407, 'epoch': 2} {'type': 'loss', 'content': 0.1490126997232437, 'timestamp': '2025-09-30 22:22:29.842916', 'step': 8408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:29.878768', 'step': 8408, 'epoch': 2} {'type': 'loss', 'content': 0.14473745226860046, 'timestamp': '2025-09-30 22:22:29.884741', 'step': 8409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:29.923760', 'step': 8409, 'epoch': 2} {'type': 'loss', 'content': 0.1119786947965622, 'timestamp': '2025-09-30 22:22:29.937569', 'step': 8410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:29.979991', 'step': 8410, 'epoch': 2} {'type': 'loss', 'content': 0.13288451731204987, 'timestamp': '2025-09-30 22:22:29.984787', 'step': 8411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:30.018759', 'step': 8411, 'epoch': 2} {'type': 'loss', 'content': 0.1284860223531723, 'timestamp': '2025-09-30 22:22:30.044522', 'step': 8412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.076393', 'step': 8412, 'epoch': 2} {'type': 'loss', 'content': 0.11829035729169846, 'timestamp': '2025-09-30 22:22:30.079677', 'step': 8413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:30.116790', 'step': 8413, 'epoch': 2} {'type': 'loss', 'content': 0.05579974874854088, 'timestamp': '2025-09-30 22:22:30.130843', 'step': 8414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.163468', 'step': 8414, 'epoch': 2} {'type': 'loss', 'content': 0.12976792454719543, 'timestamp': '2025-09-30 22:22:30.177943', 'step': 8415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.211624', 'step': 8415, 'epoch': 2} {'type': 'loss', 'content': 0.11938964575529099, 'timestamp': '2025-09-30 22:22:30.244389', 'step': 8416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.276108', 'step': 8416, 'epoch': 2} {'type': 'loss', 'content': 0.16317442059516907, 'timestamp': '2025-09-30 22:22:30.278727', 'step': 8417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:30.309510', 'step': 8417, 'epoch': 2} {'type': 'loss', 'content': 0.14934691786766052, 'timestamp': '2025-09-30 22:22:30.321825', 'step': 8418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.362209', 'step': 8418, 'epoch': 2} {'type': 'loss', 'content': 0.19457906484603882, 'timestamp': '2025-09-30 22:22:30.364994', 'step': 8419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.399267', 'step': 8419, 'epoch': 2} {'type': 'loss', 'content': 0.1589238941669464, 'timestamp': '2025-09-30 22:22:30.432372', 'step': 8420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.463894', 'step': 8420, 'epoch': 2} {'type': 'loss', 'content': 0.09788127988576889, 'timestamp': '2025-09-30 22:22:30.467562', 'step': 8421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:30.499118', 'step': 8421, 'epoch': 2} {'type': 'loss', 'content': 0.09155292063951492, 'timestamp': '2025-09-30 22:22:30.509053', 'step': 8422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.547044', 'step': 8422, 'epoch': 2} {'type': 'loss', 'content': 0.17542769014835358, 'timestamp': '2025-09-30 22:22:30.555023', 'step': 8423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.588557', 'step': 8423, 'epoch': 2} {'type': 'loss', 'content': 0.1038709282875061, 'timestamp': '2025-09-30 22:22:30.620600', 'step': 8424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:30.667328', 'step': 8424, 'epoch': 2} {'type': 'loss', 'content': 0.1451600193977356, 'timestamp': '2025-09-30 22:22:30.670631', 'step': 8425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.708294', 'step': 8425, 'epoch': 2} {'type': 'loss', 'content': 0.08441348373889923, 'timestamp': '2025-09-30 22:22:30.712059', 'step': 8426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:30.744056', 'step': 8426, 'epoch': 2} {'type': 'loss', 'content': 0.09836500138044357, 'timestamp': '2025-09-30 22:22:30.755574', 'step': 8427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:30.786930', 'step': 8427, 'epoch': 2} {'type': 'loss', 'content': 0.11944491416215897, 'timestamp': '2025-09-30 22:22:30.811762', 'step': 8428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.849463', 'step': 8428, 'epoch': 2} {'type': 'loss', 'content': 0.09488619118928909, 'timestamp': '2025-09-30 22:22:30.859655', 'step': 8429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:30.897555', 'step': 8429, 'epoch': 2} {'type': 'loss', 'content': 0.15144877135753632, 'timestamp': '2025-09-30 22:22:30.908235', 'step': 8430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:30.951402', 'step': 8430, 'epoch': 2} {'type': 'loss', 'content': 0.11804933845996857, 'timestamp': '2025-09-30 22:22:30.963113', 'step': 8431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:30.995079', 'step': 8431, 'epoch': 2} {'type': 'loss', 'content': 0.2005850225687027, 'timestamp': '2025-09-30 22:22:31.024775', 'step': 8432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.062180', 'step': 8432, 'epoch': 2} {'type': 'loss', 'content': 0.08814539015293121, 'timestamp': '2025-09-30 22:22:31.072618', 'step': 8433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.114563', 'step': 8433, 'epoch': 2} {'type': 'loss', 'content': 0.07631726562976837, 'timestamp': '2025-09-30 22:22:31.136183', 'step': 8434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:31.213276', 'step': 8434, 'epoch': 2} {'type': 'loss', 'content': 0.16464842855930328, 'timestamp': '2025-09-30 22:22:31.217475', 'step': 8435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:31.249169', 'step': 8435, 'epoch': 2} {'type': 'loss', 'content': 0.14620919525623322, 'timestamp': '2025-09-30 22:22:31.273778', 'step': 8436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:31.309679', 'step': 8436, 'epoch': 2} {'type': 'loss', 'content': 0.11625999212265015, 'timestamp': '2025-09-30 22:22:31.318400', 'step': 8437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.375004', 'step': 8437, 'epoch': 2} {'type': 'loss', 'content': 0.1464928686618805, 'timestamp': '2025-09-30 22:22:31.384200', 'step': 8438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.421962', 'step': 8438, 'epoch': 2} {'type': 'loss', 'content': 0.15686088800430298, 'timestamp': '2025-09-30 22:22:31.430414', 'step': 8439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.468930', 'step': 8439, 'epoch': 2} {'type': 'loss', 'content': 0.1282372772693634, 'timestamp': '2025-09-30 22:22:31.493372', 'step': 8440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.524649', 'step': 8440, 'epoch': 2} {'type': 'loss', 'content': 0.0993160530924797, 'timestamp': '2025-09-30 22:22:31.531159', 'step': 8441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.562793', 'step': 8441, 'epoch': 2} {'type': 'loss', 'content': 0.11544863879680634, 'timestamp': '2025-09-30 22:22:31.565676', 'step': 8442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.603706', 'step': 8442, 'epoch': 2} {'type': 'loss', 'content': 0.06601973623037338, 'timestamp': '2025-09-30 22:22:31.616103', 'step': 8443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.652996', 'step': 8443, 'epoch': 2} {'type': 'loss', 'content': 0.14986741542816162, 'timestamp': '2025-09-30 22:22:31.682317', 'step': 8444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:31.715620', 'step': 8444, 'epoch': 2} {'type': 'loss', 'content': 0.14391663670539856, 'timestamp': '2025-09-30 22:22:31.719207', 'step': 8445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.760161', 'step': 8445, 'epoch': 2} {'type': 'loss', 'content': 0.0629289299249649, 'timestamp': '2025-09-30 22:22:31.768623', 'step': 8446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:31.804167', 'step': 8446, 'epoch': 2} {'type': 'loss', 'content': 0.08682873100042343, 'timestamp': '2025-09-30 22:22:31.806979', 'step': 8447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:31.840541', 'step': 8447, 'epoch': 2} {'type': 'loss', 'content': 0.11239513009786606, 'timestamp': '2025-09-30 22:22:31.865357', 'step': 8448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.896269', 'step': 8448, 'epoch': 2} {'type': 'loss', 'content': 0.1599019467830658, 'timestamp': '2025-09-30 22:22:31.899317', 'step': 8449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.935681', 'step': 8449, 'epoch': 2} {'type': 'loss', 'content': 0.11630979180335999, 'timestamp': '2025-09-30 22:22:31.942473', 'step': 8450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:31.995741', 'step': 8450, 'epoch': 2} {'type': 'loss', 'content': 0.061238422989845276, 'timestamp': '2025-09-30 22:22:31.999723', 'step': 8451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:32.038451', 'step': 8451, 'epoch': 2} {'type': 'loss', 'content': 0.15929321944713593, 'timestamp': '2025-09-30 22:22:32.062991', 'step': 8452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.099167', 'step': 8452, 'epoch': 2} {'type': 'loss', 'content': 0.10094713419675827, 'timestamp': '2025-09-30 22:22:32.109568', 'step': 8453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:32.147938', 'step': 8453, 'epoch': 2} {'type': 'loss', 'content': 0.16695192456245422, 'timestamp': '2025-09-30 22:22:32.156135', 'step': 8454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:32.191392', 'step': 8454, 'epoch': 2} {'type': 'loss', 'content': 0.11839097738265991, 'timestamp': '2025-09-30 22:22:32.196414', 'step': 8455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:32.230507', 'step': 8455, 'epoch': 2} {'type': 'loss', 'content': 0.14212492108345032, 'timestamp': '2025-09-30 22:22:32.259606', 'step': 8456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.291777', 'step': 8456, 'epoch': 2} {'type': 'loss', 'content': 0.10641995072364807, 'timestamp': '2025-09-30 22:22:32.299048', 'step': 8457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.333900', 'step': 8457, 'epoch': 2} {'type': 'loss', 'content': 0.14364634454250336, 'timestamp': '2025-09-30 22:22:32.338013', 'step': 8458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:32.369543', 'step': 8458, 'epoch': 2} {'type': 'loss', 'content': 0.16531629860401154, 'timestamp': '2025-09-30 22:22:32.376458', 'step': 8459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:32.418948', 'step': 8459, 'epoch': 2} {'type': 'loss', 'content': 0.0980885848402977, 'timestamp': '2025-09-30 22:22:32.448715', 'step': 8460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.484355', 'step': 8460, 'epoch': 2} {'type': 'loss', 'content': 0.10273493081331253, 'timestamp': '2025-09-30 22:22:32.493227', 'step': 8461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.525795', 'step': 8461, 'epoch': 2} {'type': 'loss', 'content': 0.1516263633966446, 'timestamp': '2025-09-30 22:22:32.536670', 'step': 8462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:32.571778', 'step': 8462, 'epoch': 2} {'type': 'loss', 'content': 0.20466063916683197, 'timestamp': '2025-09-30 22:22:32.576688', 'step': 8463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.623111', 'step': 8463, 'epoch': 2} {'type': 'loss', 'content': 0.07051938772201538, 'timestamp': '2025-09-30 22:22:32.649345', 'step': 8464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:32.680527', 'step': 8464, 'epoch': 2} {'type': 'loss', 'content': 0.10457552969455719, 'timestamp': '2025-09-30 22:22:32.689297', 'step': 8465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:32.721658', 'step': 8465, 'epoch': 2} {'type': 'loss', 'content': 0.10021030157804489, 'timestamp': '2025-09-30 22:22:32.726075', 'step': 8466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.758876', 'step': 8466, 'epoch': 2} {'type': 'loss', 'content': 0.15430352091789246, 'timestamp': '2025-09-30 22:22:32.763109', 'step': 8467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:32.794880', 'step': 8467, 'epoch': 2} {'type': 'loss', 'content': 0.08954820036888123, 'timestamp': '2025-09-30 22:22:32.825953', 'step': 8468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:32.858198', 'step': 8468, 'epoch': 2} {'type': 'loss', 'content': 0.13722330331802368, 'timestamp': '2025-09-30 22:22:32.867255', 'step': 8469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:32.904213', 'step': 8469, 'epoch': 2} {'type': 'loss', 'content': 0.0667223259806633, 'timestamp': '2025-09-30 22:22:32.909521', 'step': 8470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:32.940961', 'step': 8470, 'epoch': 2} {'type': 'loss', 'content': 0.20239463448524475, 'timestamp': '2025-09-30 22:22:32.944884', 'step': 8471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:32.975342', 'step': 8471, 'epoch': 2} {'type': 'loss', 'content': 0.1347390115261078, 'timestamp': '2025-09-30 22:22:33.001517', 'step': 8472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:33.032355', 'step': 8472, 'epoch': 2} {'type': 'loss', 'content': 0.1627877950668335, 'timestamp': '2025-09-30 22:22:33.036526', 'step': 8473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.069257', 'step': 8473, 'epoch': 2} {'type': 'loss', 'content': 0.10155186057090759, 'timestamp': '2025-09-30 22:22:33.076038', 'step': 8474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.108411', 'step': 8474, 'epoch': 2} {'type': 'loss', 'content': 0.06365866959095001, 'timestamp': '2025-09-30 22:22:33.116159', 'step': 8475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.147402', 'step': 8475, 'epoch': 2} {'type': 'loss', 'content': 0.1662006378173828, 'timestamp': '2025-09-30 22:22:33.171343', 'step': 8476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.204884', 'step': 8476, 'epoch': 2} {'type': 'loss', 'content': 0.08294471353292465, 'timestamp': '2025-09-30 22:22:33.209628', 'step': 8477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.243510', 'step': 8477, 'epoch': 2} {'type': 'loss', 'content': 0.08887745440006256, 'timestamp': '2025-09-30 22:22:33.248703', 'step': 8478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.282004', 'step': 8478, 'epoch': 2} {'type': 'loss', 'content': 0.06549414992332458, 'timestamp': '2025-09-30 22:22:33.286361', 'step': 8479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.320793', 'step': 8479, 'epoch': 2} {'type': 'loss', 'content': 0.14350707828998566, 'timestamp': '2025-09-30 22:22:33.348195', 'step': 8480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.381325', 'step': 8480, 'epoch': 2} {'type': 'loss', 'content': 0.09406164288520813, 'timestamp': '2025-09-30 22:22:33.386503', 'step': 8481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.421785', 'step': 8481, 'epoch': 2} {'type': 'loss', 'content': 0.22689782083034515, 'timestamp': '2025-09-30 22:22:33.427052', 'step': 8482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.461104', 'step': 8482, 'epoch': 2} {'type': 'loss', 'content': 0.12293467670679092, 'timestamp': '2025-09-30 22:22:33.464008', 'step': 8483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.501038', 'step': 8483, 'epoch': 2} {'type': 'loss', 'content': 0.0872502252459526, 'timestamp': '2025-09-30 22:22:33.525734', 'step': 8484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.562002', 'step': 8484, 'epoch': 2} {'type': 'loss', 'content': 0.08986914902925491, 'timestamp': '2025-09-30 22:22:33.564776', 'step': 8485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.599368', 'step': 8485, 'epoch': 2} {'type': 'loss', 'content': 0.14196942746639252, 'timestamp': '2025-09-30 22:22:33.609078', 'step': 8486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.641541', 'step': 8486, 'epoch': 2} {'type': 'loss', 'content': 0.09420682489871979, 'timestamp': '2025-09-30 22:22:33.644119', 'step': 8487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.688163', 'step': 8487, 'epoch': 2} {'type': 'loss', 'content': 0.19326093792915344, 'timestamp': '2025-09-30 22:22:33.711990', 'step': 8488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.757183', 'step': 8488, 'epoch': 2} {'type': 'loss', 'content': 0.19669702649116516, 'timestamp': '2025-09-30 22:22:33.760405', 'step': 8489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:33.790891', 'step': 8489, 'epoch': 2} {'type': 'loss', 'content': 0.06400566548109055, 'timestamp': '2025-09-30 22:22:33.797048', 'step': 8490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:33.828899', 'step': 8490, 'epoch': 2} {'type': 'loss', 'content': 0.22966298460960388, 'timestamp': '2025-09-30 22:22:33.833961', 'step': 8491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.865157', 'step': 8491, 'epoch': 2} {'type': 'loss', 'content': 0.11323629319667816, 'timestamp': '2025-09-30 22:22:33.891581', 'step': 8492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:33.925004', 'step': 8492, 'epoch': 2} {'type': 'loss', 'content': 0.22071346640586853, 'timestamp': '2025-09-30 22:22:33.936664', 'step': 8493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:33.967642', 'step': 8493, 'epoch': 2} {'type': 'loss', 'content': 0.10928760468959808, 'timestamp': '2025-09-30 22:22:33.970665', 'step': 8494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:34.006271', 'step': 8494, 'epoch': 2} {'type': 'loss', 'content': 0.19289177656173706, 'timestamp': '2025-09-30 22:22:34.013876', 'step': 8495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:34.045612', 'step': 8495, 'epoch': 2} {'type': 'loss', 'content': 0.124015212059021, 'timestamp': '2025-09-30 22:22:34.070567', 'step': 8496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:34.103952', 'step': 8496, 'epoch': 2} {'type': 'loss', 'content': 0.11862973868846893, 'timestamp': '2025-09-30 22:22:34.108116', 'step': 8497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:34.141490', 'step': 8497, 'epoch': 2} {'type': 'loss', 'content': 0.07881335914134979, 'timestamp': '2025-09-30 22:22:34.145568', 'step': 8498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:34.176210', 'step': 8498, 'epoch': 2} {'type': 'loss', 'content': 0.10480104386806488, 'timestamp': '2025-09-30 22:22:34.178559', 'step': 8499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:34.209170', 'step': 8499, 'epoch': 2} {'type': 'loss', 'content': 0.12184160947799683, 'timestamp': '2025-09-30 22:22:34.245115', 'step': 8500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 8500', 'timestamp': '2025-09-30 22:22:39.545574', 'step': 8500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:39.591951', 'step': 8500, 'epoch': 2} {'type': 'loss', 'content': 0.21258333325386047, 'timestamp': '2025-09-30 22:22:39.595034', 'step': 8501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:39.627419', 'step': 8501, 'epoch': 2} {'type': 'loss', 'content': 0.17828315496444702, 'timestamp': '2025-09-30 22:22:39.630547', 'step': 8502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:39.663196', 'step': 8502, 'epoch': 2} {'type': 'loss', 'content': 0.10536453127861023, 'timestamp': '2025-09-30 22:22:39.666785', 'step': 8503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:39.698793', 'step': 8503, 'epoch': 2} {'type': 'loss', 'content': 0.1875482201576233, 'timestamp': '2025-09-30 22:22:39.730982', 'step': 8504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:39.769712', 'step': 8504, 'epoch': 2} {'type': 'loss', 'content': 0.16228877007961273, 'timestamp': '2025-09-30 22:22:39.779990', 'step': 8505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:39.815108', 'step': 8505, 'epoch': 2} {'type': 'loss', 'content': 0.044257067143917084, 'timestamp': '2025-09-30 22:22:39.818929', 'step': 8506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:39.858084', 'step': 8506, 'epoch': 2} {'type': 'loss', 'content': 0.16881196200847626, 'timestamp': '2025-09-30 22:22:39.872142', 'step': 8507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:39.911670', 'step': 8507, 'epoch': 2} {'type': 'loss', 'content': 0.126590758562088, 'timestamp': '2025-09-30 22:22:39.937388', 'step': 8508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:39.978689', 'step': 8508, 'epoch': 2} {'type': 'loss', 'content': 0.09577898681163788, 'timestamp': '2025-09-30 22:22:39.993793', 'step': 8509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:40.026250', 'step': 8509, 'epoch': 2} {'type': 'loss', 'content': 0.07349362969398499, 'timestamp': '2025-09-30 22:22:40.038721', 'step': 8510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.076062', 'step': 8510, 'epoch': 2} {'type': 'loss', 'content': 0.09021750092506409, 'timestamp': '2025-09-30 22:22:40.082822', 'step': 8511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:40.116272', 'step': 8511, 'epoch': 2} {'type': 'loss', 'content': 0.10479102283716202, 'timestamp': '2025-09-30 22:22:40.151669', 'step': 8512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.190768', 'step': 8512, 'epoch': 2} {'type': 'loss', 'content': 0.09058788418769836, 'timestamp': '2025-09-30 22:22:40.195683', 'step': 8513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:40.233146', 'step': 8513, 'epoch': 2} {'type': 'loss', 'content': 0.08969937264919281, 'timestamp': '2025-09-30 22:22:40.242899', 'step': 8514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.282396', 'step': 8514, 'epoch': 2} {'type': 'loss', 'content': 0.08497579395771027, 'timestamp': '2025-09-30 22:22:40.285432', 'step': 8515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:40.325695', 'step': 8515, 'epoch': 2} {'type': 'loss', 'content': 0.1795949637889862, 'timestamp': '2025-09-30 22:22:40.354364', 'step': 8516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.397362', 'step': 8516, 'epoch': 2} {'type': 'loss', 'content': 0.22561973333358765, 'timestamp': '2025-09-30 22:22:40.400645', 'step': 8517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.437849', 'step': 8517, 'epoch': 2} {'type': 'loss', 'content': 0.06201459839940071, 'timestamp': '2025-09-30 22:22:40.440792', 'step': 8518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.484156', 'step': 8518, 'epoch': 2} {'type': 'loss', 'content': 0.1036476343870163, 'timestamp': '2025-09-30 22:22:40.487125', 'step': 8519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:40.524717', 'step': 8519, 'epoch': 2} {'type': 'loss', 'content': 0.1651490032672882, 'timestamp': '2025-09-30 22:22:40.557928', 'step': 8520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.591282', 'step': 8520, 'epoch': 2} {'type': 'loss', 'content': 0.2520850598812103, 'timestamp': '2025-09-30 22:22:40.600280', 'step': 8521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.646674', 'step': 8521, 'epoch': 2} {'type': 'loss', 'content': 0.0824412927031517, 'timestamp': '2025-09-30 22:22:40.660958', 'step': 8522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.699357', 'step': 8522, 'epoch': 2} {'type': 'loss', 'content': 0.12155776470899582, 'timestamp': '2025-09-30 22:22:40.702599', 'step': 8523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.743777', 'step': 8523, 'epoch': 2} {'type': 'loss', 'content': 0.13183170557022095, 'timestamp': '2025-09-30 22:22:40.774148', 'step': 8524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.813467', 'step': 8524, 'epoch': 2} {'type': 'loss', 'content': 0.13915060460567474, 'timestamp': '2025-09-30 22:22:40.824415', 'step': 8525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:40.871872', 'step': 8525, 'epoch': 2} {'type': 'loss', 'content': 0.15758810937404633, 'timestamp': '2025-09-30 22:22:40.874407', 'step': 8526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:40.911583', 'step': 8526, 'epoch': 2} {'type': 'loss', 'content': 0.10241589695215225, 'timestamp': '2025-09-30 22:22:40.915922', 'step': 8527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:40.953427', 'step': 8527, 'epoch': 2} {'type': 'loss', 'content': 0.13237759470939636, 'timestamp': '2025-09-30 22:22:40.978897', 'step': 8528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:41.012721', 'step': 8528, 'epoch': 2} {'type': 'loss', 'content': 0.1291123777627945, 'timestamp': '2025-09-30 22:22:41.015825', 'step': 8529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:41.050309', 'step': 8529, 'epoch': 2} {'type': 'loss', 'content': 0.13923758268356323, 'timestamp': '2025-09-30 22:22:41.054476', 'step': 8530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.096770', 'step': 8530, 'epoch': 2} {'type': 'loss', 'content': 0.12516790628433228, 'timestamp': '2025-09-30 22:22:41.100021', 'step': 8531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:41.137395', 'step': 8531, 'epoch': 2} {'type': 'loss', 'content': 0.13088342547416687, 'timestamp': '2025-09-30 22:22:41.163290', 'step': 8532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.216992', 'step': 8532, 'epoch': 2} {'type': 'loss', 'content': 0.13119550049304962, 'timestamp': '2025-09-30 22:22:41.220503', 'step': 8533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.256887', 'step': 8533, 'epoch': 2} {'type': 'loss', 'content': 0.14548452198505402, 'timestamp': '2025-09-30 22:22:41.259979', 'step': 8534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.304511', 'step': 8534, 'epoch': 2} {'type': 'loss', 'content': 0.11856727302074432, 'timestamp': '2025-09-30 22:22:41.314141', 'step': 8535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.348476', 'step': 8535, 'epoch': 2} {'type': 'loss', 'content': 0.09296625107526779, 'timestamp': '2025-09-30 22:22:41.372499', 'step': 8536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.414498', 'step': 8536, 'epoch': 2} {'type': 'loss', 'content': 0.17398959398269653, 'timestamp': '2025-09-30 22:22:41.418131', 'step': 8537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.466329', 'step': 8537, 'epoch': 2} {'type': 'loss', 'content': 0.08569314330816269, 'timestamp': '2025-09-30 22:22:41.475285', 'step': 8538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.525470', 'step': 8538, 'epoch': 2} {'type': 'loss', 'content': 0.047663092613220215, 'timestamp': '2025-09-30 22:22:41.533309', 'step': 8539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.576941', 'step': 8539, 'epoch': 2} {'type': 'loss', 'content': 0.22808663547039032, 'timestamp': '2025-09-30 22:22:41.602610', 'step': 8540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:41.650693', 'step': 8540, 'epoch': 2} {'type': 'loss', 'content': 0.08164830505847931, 'timestamp': '2025-09-30 22:22:41.658531', 'step': 8541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.704826', 'step': 8541, 'epoch': 2} {'type': 'loss', 'content': 0.03562328591942787, 'timestamp': '2025-09-30 22:22:41.708736', 'step': 8542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:41.740709', 'step': 8542, 'epoch': 2} {'type': 'loss', 'content': 0.20053894817829132, 'timestamp': '2025-09-30 22:22:41.744558', 'step': 8543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.784833', 'step': 8543, 'epoch': 2} {'type': 'loss', 'content': 0.061902787536382675, 'timestamp': '2025-09-30 22:22:41.820214', 'step': 8544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.852507', 'step': 8544, 'epoch': 2} {'type': 'loss', 'content': 0.12945672869682312, 'timestamp': '2025-09-30 22:22:41.857828', 'step': 8545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:41.907563', 'step': 8545, 'epoch': 2} {'type': 'loss', 'content': 0.08651909232139587, 'timestamp': '2025-09-30 22:22:41.924375', 'step': 8546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:41.958717', 'step': 8546, 'epoch': 2} {'type': 'loss', 'content': 0.12926092743873596, 'timestamp': '2025-09-30 22:22:41.963616', 'step': 8547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:41.995929', 'step': 8547, 'epoch': 2} {'type': 'loss', 'content': 0.1121860072016716, 'timestamp': '2025-09-30 22:22:42.036263', 'step': 8548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:42.083259', 'step': 8548, 'epoch': 2} {'type': 'loss', 'content': 0.11631283164024353, 'timestamp': '2025-09-30 22:22:42.088564', 'step': 8549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:42.120953', 'step': 8549, 'epoch': 2} {'type': 'loss', 'content': 0.11832768470048904, 'timestamp': '2025-09-30 22:22:42.141198', 'step': 8550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:42.173047', 'step': 8550, 'epoch': 2} {'type': 'loss', 'content': 0.07993977516889572, 'timestamp': '2025-09-30 22:22:42.179148', 'step': 8551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:42.212503', 'step': 8551, 'epoch': 2} {'type': 'loss', 'content': 0.08930612355470657, 'timestamp': '2025-09-30 22:22:42.250279', 'step': 8552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:42.281561', 'step': 8552, 'epoch': 2} {'type': 'loss', 'content': 0.14646677672863007, 'timestamp': '2025-09-30 22:22:42.287872', 'step': 8553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:42.330210', 'step': 8553, 'epoch': 2} {'type': 'loss', 'content': 0.11219724267721176, 'timestamp': '2025-09-30 22:22:42.334451', 'step': 8554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:42.366314', 'step': 8554, 'epoch': 2} {'type': 'loss', 'content': 0.1554146260023117, 'timestamp': '2025-09-30 22:22:42.370235', 'step': 8555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:42.405816', 'step': 8555, 'epoch': 2} {'type': 'loss', 'content': 0.0864458754658699, 'timestamp': '2025-09-30 22:22:42.430198', 'step': 8556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:42.462096', 'step': 8556, 'epoch': 2} {'type': 'loss', 'content': 0.13491787016391754, 'timestamp': '2025-09-30 22:22:42.471730', 'step': 8557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:42.512540', 'step': 8557, 'epoch': 2} {'type': 'loss', 'content': 0.1528884470462799, 'timestamp': '2025-09-30 22:22:42.515939', 'step': 8558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:42.548695', 'step': 8558, 'epoch': 2} {'type': 'loss', 'content': 0.13152538239955902, 'timestamp': '2025-09-30 22:22:42.559588', 'step': 8559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:42.599175', 'step': 8559, 'epoch': 2} {'type': 'loss', 'content': 0.10320155322551727, 'timestamp': '2025-09-30 22:22:42.624451', 'step': 8560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:42.657191', 'step': 8560, 'epoch': 2} {'type': 'loss', 'content': 0.07034442573785782, 'timestamp': '2025-09-30 22:22:42.660578', 'step': 8561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:42.694290', 'step': 8561, 'epoch': 2} {'type': 'loss', 'content': 0.08474023640155792, 'timestamp': '2025-09-30 22:22:42.699510', 'step': 8562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:42.733583', 'step': 8562, 'epoch': 2} {'type': 'loss', 'content': 0.14729192852973938, 'timestamp': '2025-09-30 22:22:42.736426', 'step': 8563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:42.767206', 'step': 8563, 'epoch': 2} {'type': 'loss', 'content': 0.09661585837602615, 'timestamp': '2025-09-30 22:22:42.794433', 'step': 8564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:42.827630', 'step': 8564, 'epoch': 2} {'type': 'loss', 'content': 0.11292463541030884, 'timestamp': '2025-09-30 22:22:42.830789', 'step': 8565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:42.860647', 'step': 8565, 'epoch': 2} {'type': 'loss', 'content': 0.12151926755905151, 'timestamp': '2025-09-30 22:22:42.865133', 'step': 8566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:42.897589', 'step': 8566, 'epoch': 2} {'type': 'loss', 'content': 0.13561291992664337, 'timestamp': '2025-09-30 22:22:42.908457', 'step': 8567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:42.942476', 'step': 8567, 'epoch': 2} {'type': 'loss', 'content': 0.16853927075862885, 'timestamp': '2025-09-30 22:22:42.974612', 'step': 8568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.008602', 'step': 8568, 'epoch': 2} {'type': 'loss', 'content': 0.09324558079242706, 'timestamp': '2025-09-30 22:22:43.011398', 'step': 8569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:43.046242', 'step': 8569, 'epoch': 2} {'type': 'loss', 'content': 0.1193685457110405, 'timestamp': '2025-09-30 22:22:43.050036', 'step': 8570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:43.080797', 'step': 8570, 'epoch': 2} {'type': 'loss', 'content': 0.11686433106660843, 'timestamp': '2025-09-30 22:22:43.084961', 'step': 8571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:43.118426', 'step': 8571, 'epoch': 2} {'type': 'loss', 'content': 0.12343678623437881, 'timestamp': '2025-09-30 22:22:43.145546', 'step': 8572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:43.179143', 'step': 8572, 'epoch': 2} {'type': 'loss', 'content': 0.15741513669490814, 'timestamp': '2025-09-30 22:22:43.182234', 'step': 8573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.215357', 'step': 8573, 'epoch': 2} {'type': 'loss', 'content': 0.13353784382343292, 'timestamp': '2025-09-30 22:22:43.223085', 'step': 8574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:43.260130', 'step': 8574, 'epoch': 2} {'type': 'loss', 'content': 0.07672818005084991, 'timestamp': '2025-09-30 22:22:43.269225', 'step': 8575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.300766', 'step': 8575, 'epoch': 2} {'type': 'loss', 'content': 0.08814828097820282, 'timestamp': '2025-09-30 22:22:43.326571', 'step': 8576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.357361', 'step': 8576, 'epoch': 2} {'type': 'loss', 'content': 0.07461651414632797, 'timestamp': '2025-09-30 22:22:43.362564', 'step': 8577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.401137', 'step': 8577, 'epoch': 2} {'type': 'loss', 'content': 0.19856630265712738, 'timestamp': '2025-09-30 22:22:43.406625', 'step': 8578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.442199', 'step': 8578, 'epoch': 2} {'type': 'loss', 'content': 0.16582751274108887, 'timestamp': '2025-09-30 22:22:43.446515', 'step': 8579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.477937', 'step': 8579, 'epoch': 2} {'type': 'loss', 'content': 0.08629909157752991, 'timestamp': '2025-09-30 22:22:43.503508', 'step': 8580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:43.543961', 'step': 8580, 'epoch': 2} {'type': 'loss', 'content': 0.2008187472820282, 'timestamp': '2025-09-30 22:22:43.547937', 'step': 8581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.580058', 'step': 8581, 'epoch': 2} {'type': 'loss', 'content': 0.15126477181911469, 'timestamp': '2025-09-30 22:22:43.585646', 'step': 8582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.631118', 'step': 8582, 'epoch': 2} {'type': 'loss', 'content': 0.106790691614151, 'timestamp': '2025-09-30 22:22:43.635460', 'step': 8583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:43.682101', 'step': 8583, 'epoch': 2} {'type': 'loss', 'content': 0.18335938453674316, 'timestamp': '2025-09-30 22:22:43.708249', 'step': 8584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:43.739733', 'step': 8584, 'epoch': 2} {'type': 'loss', 'content': 0.10225916653871536, 'timestamp': '2025-09-30 22:22:43.744451', 'step': 8585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.788515', 'step': 8585, 'epoch': 2} {'type': 'loss', 'content': 0.13297785818576813, 'timestamp': '2025-09-30 22:22:43.794941', 'step': 8586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:43.827296', 'step': 8586, 'epoch': 2} {'type': 'loss', 'content': 0.12382085621356964, 'timestamp': '2025-09-30 22:22:43.831120', 'step': 8587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.864556', 'step': 8587, 'epoch': 2} {'type': 'loss', 'content': 0.13331754505634308, 'timestamp': '2025-09-30 22:22:43.891619', 'step': 8588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:43.936481', 'step': 8588, 'epoch': 2} {'type': 'loss', 'content': 0.10276609659194946, 'timestamp': '2025-09-30 22:22:43.954532', 'step': 8589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.000094', 'step': 8589, 'epoch': 2} {'type': 'loss', 'content': 0.12921230494976044, 'timestamp': '2025-09-30 22:22:44.015292', 'step': 8590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:44.048443', 'step': 8590, 'epoch': 2} {'type': 'loss', 'content': 0.18395529687404633, 'timestamp': '2025-09-30 22:22:44.052217', 'step': 8591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.093909', 'step': 8591, 'epoch': 2} {'type': 'loss', 'content': 0.10778199136257172, 'timestamp': '2025-09-30 22:22:44.119440', 'step': 8592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:44.151199', 'step': 8592, 'epoch': 2} {'type': 'loss', 'content': 0.18222767114639282, 'timestamp': '2025-09-30 22:22:44.161782', 'step': 8593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:44.194680', 'step': 8593, 'epoch': 2} {'type': 'loss', 'content': 0.07095649838447571, 'timestamp': '2025-09-30 22:22:44.206017', 'step': 8594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:44.243846', 'step': 8594, 'epoch': 2} {'type': 'loss', 'content': 0.11634548008441925, 'timestamp': '2025-09-30 22:22:44.253944', 'step': 8595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:44.286134', 'step': 8595, 'epoch': 2} {'type': 'loss', 'content': 0.20298929512500763, 'timestamp': '2025-09-30 22:22:44.316645', 'step': 8596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.349690', 'step': 8596, 'epoch': 2} {'type': 'loss', 'content': 0.08060257136821747, 'timestamp': '2025-09-30 22:22:44.357984', 'step': 8597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.396954', 'step': 8597, 'epoch': 2} {'type': 'loss', 'content': 0.13950826227664948, 'timestamp': '2025-09-30 22:22:44.405934', 'step': 8598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:44.438062', 'step': 8598, 'epoch': 2} {'type': 'loss', 'content': 0.12224481999874115, 'timestamp': '2025-09-30 22:22:44.440837', 'step': 8599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:44.477862', 'step': 8599, 'epoch': 2} {'type': 'loss', 'content': 0.044821638613939285, 'timestamp': '2025-09-30 22:22:44.508164', 'step': 8600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.542270', 'step': 8600, 'epoch': 2} {'type': 'loss', 'content': 0.09228341281414032, 'timestamp': '2025-09-30 22:22:44.549822', 'step': 8601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.581466', 'step': 8601, 'epoch': 2} {'type': 'loss', 'content': 0.11073340475559235, 'timestamp': '2025-09-30 22:22:44.592272', 'step': 8602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:44.623741', 'step': 8602, 'epoch': 2} {'type': 'loss', 'content': 0.09398669004440308, 'timestamp': '2025-09-30 22:22:44.626491', 'step': 8603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.663727', 'step': 8603, 'epoch': 2} {'type': 'loss', 'content': 0.16543026268482208, 'timestamp': '2025-09-30 22:22:44.688666', 'step': 8604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:44.721492', 'step': 8604, 'epoch': 2} {'type': 'loss', 'content': 0.13738764822483063, 'timestamp': '2025-09-30 22:22:44.727791', 'step': 8605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.766604', 'step': 8605, 'epoch': 2} {'type': 'loss', 'content': 0.08877559751272202, 'timestamp': '2025-09-30 22:22:44.778340', 'step': 8606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.820254', 'step': 8606, 'epoch': 2} {'type': 'loss', 'content': 0.09329596161842346, 'timestamp': '2025-09-30 22:22:44.824315', 'step': 8607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.857659', 'step': 8607, 'epoch': 2} {'type': 'loss', 'content': 0.09561459720134735, 'timestamp': '2025-09-30 22:22:44.882243', 'step': 8608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:44.923923', 'step': 8608, 'epoch': 2} {'type': 'loss', 'content': 0.1954667568206787, 'timestamp': '2025-09-30 22:22:44.926890', 'step': 8609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:44.958583', 'step': 8609, 'epoch': 2} {'type': 'loss', 'content': 0.09484177082777023, 'timestamp': '2025-09-30 22:22:44.972522', 'step': 8610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.004568', 'step': 8610, 'epoch': 2} {'type': 'loss', 'content': 0.08277702331542969, 'timestamp': '2025-09-30 22:22:45.013439', 'step': 8611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.044462', 'step': 8611, 'epoch': 2} {'type': 'loss', 'content': 0.10028885304927826, 'timestamp': '2025-09-30 22:22:45.083283', 'step': 8612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.122455', 'step': 8612, 'epoch': 2} {'type': 'loss', 'content': 0.10435424000024796, 'timestamp': '2025-09-30 22:22:45.131824', 'step': 8613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.164516', 'step': 8613, 'epoch': 2} {'type': 'loss', 'content': 0.09735647588968277, 'timestamp': '2025-09-30 22:22:45.174812', 'step': 8614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.211224', 'step': 8614, 'epoch': 2} {'type': 'loss', 'content': 0.09688286483287811, 'timestamp': '2025-09-30 22:22:45.221293', 'step': 8615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:22:45.257838', 'step': 8615, 'epoch': 2} {'type': 'loss', 'content': 0.16686567664146423, 'timestamp': '2025-09-30 22:22:45.288845', 'step': 8616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:45.330730', 'step': 8616, 'epoch': 2} {'type': 'loss', 'content': 0.14998465776443481, 'timestamp': '2025-09-30 22:22:45.334656', 'step': 8617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:45.365532', 'step': 8617, 'epoch': 2} {'type': 'loss', 'content': 0.15475118160247803, 'timestamp': '2025-09-30 22:22:45.377326', 'step': 8618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.419325', 'step': 8618, 'epoch': 2} {'type': 'loss', 'content': 0.05988108739256859, 'timestamp': '2025-09-30 22:22:45.432582', 'step': 8619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:45.472350', 'step': 8619, 'epoch': 2} {'type': 'loss', 'content': 0.11417437344789505, 'timestamp': '2025-09-30 22:22:45.504076', 'step': 8620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.534070', 'step': 8620, 'epoch': 2} {'type': 'loss', 'content': 0.12108098715543747, 'timestamp': '2025-09-30 22:22:45.544789', 'step': 8621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:45.582464', 'step': 8621, 'epoch': 2} {'type': 'loss', 'content': 0.1714179664850235, 'timestamp': '2025-09-30 22:22:45.591259', 'step': 8622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:45.629535', 'step': 8622, 'epoch': 2} {'type': 'loss', 'content': 0.0829865112900734, 'timestamp': '2025-09-30 22:22:45.644153', 'step': 8623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:45.688399', 'step': 8623, 'epoch': 2} {'type': 'loss', 'content': 0.0689874067902565, 'timestamp': '2025-09-30 22:22:45.719595', 'step': 8624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.759164', 'step': 8624, 'epoch': 2} {'type': 'loss', 'content': 0.10821232199668884, 'timestamp': '2025-09-30 22:22:45.769504', 'step': 8625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.802145', 'step': 8625, 'epoch': 2} {'type': 'loss', 'content': 0.09905420243740082, 'timestamp': '2025-09-30 22:22:45.813721', 'step': 8626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.853620', 'step': 8626, 'epoch': 2} {'type': 'loss', 'content': 0.14972451329231262, 'timestamp': '2025-09-30 22:22:45.864397', 'step': 8627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:45.904169', 'step': 8627, 'epoch': 2} {'type': 'loss', 'content': 0.06269753724336624, 'timestamp': '2025-09-30 22:22:45.930385', 'step': 8628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:45.962389', 'step': 8628, 'epoch': 2} {'type': 'loss', 'content': 0.08155930787324905, 'timestamp': '2025-09-30 22:22:45.965928', 'step': 8629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.005171', 'step': 8629, 'epoch': 2} {'type': 'loss', 'content': 0.14765849709510803, 'timestamp': '2025-09-30 22:22:46.008781', 'step': 8630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:46.041102', 'step': 8630, 'epoch': 2} {'type': 'loss', 'content': 0.11713229864835739, 'timestamp': '2025-09-30 22:22:46.044047', 'step': 8631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:46.097009', 'step': 8631, 'epoch': 2} {'type': 'loss', 'content': 0.13434579968452454, 'timestamp': '2025-09-30 22:22:46.123000', 'step': 8632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:46.154361', 'step': 8632, 'epoch': 2} {'type': 'loss', 'content': 0.10299835354089737, 'timestamp': '2025-09-30 22:22:46.168231', 'step': 8633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:46.201334', 'step': 8633, 'epoch': 2} {'type': 'loss', 'content': 0.14011085033416748, 'timestamp': '2025-09-30 22:22:46.205708', 'step': 8634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.237597', 'step': 8634, 'epoch': 2} {'type': 'loss', 'content': 0.10936105996370316, 'timestamp': '2025-09-30 22:22:46.240651', 'step': 8635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:46.283417', 'step': 8635, 'epoch': 2} {'type': 'loss', 'content': 0.17162549495697021, 'timestamp': '2025-09-30 22:22:46.309456', 'step': 8636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:46.341625', 'step': 8636, 'epoch': 2} {'type': 'loss', 'content': 0.11267218738794327, 'timestamp': '2025-09-30 22:22:46.345813', 'step': 8637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:46.389608', 'step': 8637, 'epoch': 2} {'type': 'loss', 'content': 0.1365513801574707, 'timestamp': '2025-09-30 22:22:46.403191', 'step': 8638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.447877', 'step': 8638, 'epoch': 2} {'type': 'loss', 'content': 0.11833007633686066, 'timestamp': '2025-09-30 22:22:46.458985', 'step': 8639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.490822', 'step': 8639, 'epoch': 2} {'type': 'loss', 'content': 0.17134785652160645, 'timestamp': '2025-09-30 22:22:46.520033', 'step': 8640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:46.563034', 'step': 8640, 'epoch': 2} {'type': 'loss', 'content': 0.11257888376712799, 'timestamp': '2025-09-30 22:22:46.574899', 'step': 8641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:46.616598', 'step': 8641, 'epoch': 2} {'type': 'loss', 'content': 0.12416745722293854, 'timestamp': '2025-09-30 22:22:46.627281', 'step': 8642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.667623', 'step': 8642, 'epoch': 2} {'type': 'loss', 'content': 0.16042104363441467, 'timestamp': '2025-09-30 22:22:46.681264', 'step': 8643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:46.713533', 'step': 8643, 'epoch': 2} {'type': 'loss', 'content': 0.07889626920223236, 'timestamp': '2025-09-30 22:22:46.738557', 'step': 8644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:46.782217', 'step': 8644, 'epoch': 2} {'type': 'loss', 'content': 0.03893479332327843, 'timestamp': '2025-09-30 22:22:46.787232', 'step': 8645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.820358', 'step': 8645, 'epoch': 2} {'type': 'loss', 'content': 0.1212461069226265, 'timestamp': '2025-09-30 22:22:46.824407', 'step': 8646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:46.856666', 'step': 8646, 'epoch': 2} {'type': 'loss', 'content': 0.1749807447195053, 'timestamp': '2025-09-30 22:22:46.868386', 'step': 8647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.906848', 'step': 8647, 'epoch': 2} {'type': 'loss', 'content': 0.15895693004131317, 'timestamp': '2025-09-30 22:22:46.935483', 'step': 8648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:46.966089', 'step': 8648, 'epoch': 2} {'type': 'loss', 'content': 0.09855102002620697, 'timestamp': '2025-09-30 22:22:46.977502', 'step': 8649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:47.015157', 'step': 8649, 'epoch': 2} {'type': 'loss', 'content': 0.09736590832471848, 'timestamp': '2025-09-30 22:22:47.031978', 'step': 8650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:47.067017', 'step': 8650, 'epoch': 2} {'type': 'loss', 'content': 0.16633661091327667, 'timestamp': '2025-09-30 22:22:47.071487', 'step': 8651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:47.117357', 'step': 8651, 'epoch': 2} {'type': 'loss', 'content': 0.06179910525679588, 'timestamp': '2025-09-30 22:22:47.144374', 'step': 8652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.186365', 'step': 8652, 'epoch': 2} {'type': 'loss', 'content': 0.12696462869644165, 'timestamp': '2025-09-30 22:22:47.191335', 'step': 8653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.222572', 'step': 8653, 'epoch': 2} {'type': 'loss', 'content': 0.13986191153526306, 'timestamp': '2025-09-30 22:22:47.226243', 'step': 8654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.265932', 'step': 8654, 'epoch': 2} {'type': 'loss', 'content': 0.13302432000637054, 'timestamp': '2025-09-30 22:22:47.273354', 'step': 8655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.306300', 'step': 8655, 'epoch': 2} {'type': 'loss', 'content': 0.11475202441215515, 'timestamp': '2025-09-30 22:22:47.339453', 'step': 8656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.378165', 'step': 8656, 'epoch': 2} {'type': 'loss', 'content': 0.12379298359155655, 'timestamp': '2025-09-30 22:22:47.388534', 'step': 8657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.425368', 'step': 8657, 'epoch': 2} {'type': 'loss', 'content': 0.12426136434078217, 'timestamp': '2025-09-30 22:22:47.429346', 'step': 8658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.462157', 'step': 8658, 'epoch': 2} {'type': 'loss', 'content': 0.06786634773015976, 'timestamp': '2025-09-30 22:22:47.472403', 'step': 8659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:47.505008', 'step': 8659, 'epoch': 2} {'type': 'loss', 'content': 0.09064916521310806, 'timestamp': '2025-09-30 22:22:47.536449', 'step': 8660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.568299', 'step': 8660, 'epoch': 2} {'type': 'loss', 'content': 0.03583712875843048, 'timestamp': '2025-09-30 22:22:47.573823', 'step': 8661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:47.618265', 'step': 8661, 'epoch': 2} {'type': 'loss', 'content': 0.07861769944429398, 'timestamp': '2025-09-30 22:22:47.631305', 'step': 8662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:47.663315', 'step': 8662, 'epoch': 2} {'type': 'loss', 'content': 0.10366416722536087, 'timestamp': '2025-09-30 22:22:47.672629', 'step': 8663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:47.712092', 'step': 8663, 'epoch': 2} {'type': 'loss', 'content': 0.08774756640195847, 'timestamp': '2025-09-30 22:22:47.736904', 'step': 8664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.775506', 'step': 8664, 'epoch': 2} {'type': 'loss', 'content': 0.1140775978565216, 'timestamp': '2025-09-30 22:22:47.779752', 'step': 8665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:47.811476', 'step': 8665, 'epoch': 2} {'type': 'loss', 'content': 0.1897531896829605, 'timestamp': '2025-09-30 22:22:47.824547', 'step': 8666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.864780', 'step': 8666, 'epoch': 2} {'type': 'loss', 'content': 0.0956781655550003, 'timestamp': '2025-09-30 22:22:47.870057', 'step': 8667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:47.914167', 'step': 8667, 'epoch': 2} {'type': 'loss', 'content': 0.07750030606985092, 'timestamp': '2025-09-30 22:22:47.939242', 'step': 8668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:47.972052', 'step': 8668, 'epoch': 2} {'type': 'loss', 'content': 0.07822777330875397, 'timestamp': '2025-09-30 22:22:47.986263', 'step': 8669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:48.022284', 'step': 8669, 'epoch': 2} {'type': 'loss', 'content': 0.20516593754291534, 'timestamp': '2025-09-30 22:22:48.026463', 'step': 8670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.059979', 'step': 8670, 'epoch': 2} {'type': 'loss', 'content': 0.09450937062501907, 'timestamp': '2025-09-30 22:22:48.065338', 'step': 8671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:22:48.098583', 'step': 8671, 'epoch': 2} {'type': 'loss', 'content': 0.15384097397327423, 'timestamp': '2025-09-30 22:22:48.133368', 'step': 8672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:48.165159', 'step': 8672, 'epoch': 2} {'type': 'loss', 'content': 0.08528339862823486, 'timestamp': '2025-09-30 22:22:48.177979', 'step': 8673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:48.217961', 'step': 8673, 'epoch': 2} {'type': 'loss', 'content': 0.10706410557031631, 'timestamp': '2025-09-30 22:22:48.222235', 'step': 8674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.256817', 'step': 8674, 'epoch': 2} {'type': 'loss', 'content': 0.12832379341125488, 'timestamp': '2025-09-30 22:22:48.269929', 'step': 8675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.328536', 'step': 8675, 'epoch': 2} {'type': 'loss', 'content': 0.09856045991182327, 'timestamp': '2025-09-30 22:22:48.354149', 'step': 8676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.385964', 'step': 8676, 'epoch': 2} {'type': 'loss', 'content': 0.11709640920162201, 'timestamp': '2025-09-30 22:22:48.396990', 'step': 8677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.438646', 'step': 8677, 'epoch': 2} {'type': 'loss', 'content': 0.10304170101881027, 'timestamp': '2025-09-30 22:22:48.443246', 'step': 8678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:48.476120', 'step': 8678, 'epoch': 2} {'type': 'loss', 'content': 0.1073349267244339, 'timestamp': '2025-09-30 22:22:48.480426', 'step': 8679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:48.520834', 'step': 8679, 'epoch': 2} {'type': 'loss', 'content': 0.0893021896481514, 'timestamp': '2025-09-30 22:22:48.546301', 'step': 8680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:48.586002', 'step': 8680, 'epoch': 2} {'type': 'loss', 'content': 0.16106455028057098, 'timestamp': '2025-09-30 22:22:48.589616', 'step': 8681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:48.621231', 'step': 8681, 'epoch': 2} {'type': 'loss', 'content': 0.08515411615371704, 'timestamp': '2025-09-30 22:22:48.634281', 'step': 8682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.677349', 'step': 8682, 'epoch': 2} {'type': 'loss', 'content': 0.10531917214393616, 'timestamp': '2025-09-30 22:22:48.693442', 'step': 8683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:48.737701', 'step': 8683, 'epoch': 2} {'type': 'loss', 'content': 0.08700284361839294, 'timestamp': '2025-09-30 22:22:48.773818', 'step': 8684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.819784', 'step': 8684, 'epoch': 2} {'type': 'loss', 'content': 0.10599140077829361, 'timestamp': '2025-09-30 22:22:48.824357', 'step': 8685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.856412', 'step': 8685, 'epoch': 2} {'type': 'loss', 'content': 0.22053197026252747, 'timestamp': '2025-09-30 22:22:48.859890', 'step': 8686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:48.892912', 'step': 8686, 'epoch': 2} {'type': 'loss', 'content': 0.1461147516965866, 'timestamp': '2025-09-30 22:22:48.897291', 'step': 8687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:48.929746', 'step': 8687, 'epoch': 2} {'type': 'loss', 'content': 0.1411217302083969, 'timestamp': '2025-09-30 22:22:48.955023', 'step': 8688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:48.995437', 'step': 8688, 'epoch': 2} {'type': 'loss', 'content': 0.11355730891227722, 'timestamp': '2025-09-30 22:22:49.007610', 'step': 8689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.047269', 'step': 8689, 'epoch': 2} {'type': 'loss', 'content': 0.1477908045053482, 'timestamp': '2025-09-30 22:22:49.057740', 'step': 8690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:49.091317', 'step': 8690, 'epoch': 2} {'type': 'loss', 'content': 0.21087941527366638, 'timestamp': '2025-09-30 22:22:49.095003', 'step': 8691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:49.129665', 'step': 8691, 'epoch': 2} {'type': 'loss', 'content': 0.137344092130661, 'timestamp': '2025-09-30 22:22:49.154273', 'step': 8692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.186336', 'step': 8692, 'epoch': 2} {'type': 'loss', 'content': 0.14946308732032776, 'timestamp': '2025-09-30 22:22:49.189467', 'step': 8693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:49.246700', 'step': 8693, 'epoch': 2} {'type': 'loss', 'content': 0.12146473675966263, 'timestamp': '2025-09-30 22:22:49.258386', 'step': 8694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.299397', 'step': 8694, 'epoch': 2} {'type': 'loss', 'content': 0.07246650755405426, 'timestamp': '2025-09-30 22:22:49.304390', 'step': 8695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.345060', 'step': 8695, 'epoch': 2} {'type': 'loss', 'content': 0.09856797754764557, 'timestamp': '2025-09-30 22:22:49.377763', 'step': 8696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:49.424367', 'step': 8696, 'epoch': 2} {'type': 'loss', 'content': 0.16591669619083405, 'timestamp': '2025-09-30 22:22:49.428974', 'step': 8697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-30 22:22:49.469287', 'step': 8697, 'epoch': 2} {'type': 'loss', 'content': 0.21080529689788818, 'timestamp': '2025-09-30 22:22:49.481457', 'step': 8698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.513723', 'step': 8698, 'epoch': 2} {'type': 'loss', 'content': 0.0979745164513588, 'timestamp': '2025-09-30 22:22:49.518920', 'step': 8699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.561603', 'step': 8699, 'epoch': 2} {'type': 'loss', 'content': 0.17010442912578583, 'timestamp': '2025-09-30 22:22:49.587972', 'step': 8700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.620550', 'step': 8700, 'epoch': 2} {'type': 'loss', 'content': 0.20372126996517181, 'timestamp': '2025-09-30 22:22:49.623749', 'step': 8701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.655486', 'step': 8701, 'epoch': 2} {'type': 'loss', 'content': 0.11240112781524658, 'timestamp': '2025-09-30 22:22:49.659617', 'step': 8702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.691638', 'step': 8702, 'epoch': 2} {'type': 'loss', 'content': 0.10192467272281647, 'timestamp': '2025-09-30 22:22:49.694808', 'step': 8703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.726830', 'step': 8703, 'epoch': 2} {'type': 'loss', 'content': 0.10496111214160919, 'timestamp': '2025-09-30 22:22:49.753289', 'step': 8704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.784434', 'step': 8704, 'epoch': 2} {'type': 'loss', 'content': 0.12232906371355057, 'timestamp': '2025-09-30 22:22:49.789100', 'step': 8705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:49.821862', 'step': 8705, 'epoch': 2} {'type': 'loss', 'content': 0.08005264401435852, 'timestamp': '2025-09-30 22:22:49.833450', 'step': 8706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:49.878624', 'step': 8706, 'epoch': 2} {'type': 'loss', 'content': 0.09952930361032486, 'timestamp': '2025-09-30 22:22:49.882248', 'step': 8707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.914001', 'step': 8707, 'epoch': 2} {'type': 'loss', 'content': 0.07979980856180191, 'timestamp': '2025-09-30 22:22:49.939900', 'step': 8708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:49.971370', 'step': 8708, 'epoch': 2} {'type': 'loss', 'content': 0.12920208275318146, 'timestamp': '2025-09-30 22:22:49.981362', 'step': 8709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.019160', 'step': 8709, 'epoch': 2} {'type': 'loss', 'content': 0.09948547929525375, 'timestamp': '2025-09-30 22:22:50.029608', 'step': 8710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:50.066442', 'step': 8710, 'epoch': 2} {'type': 'loss', 'content': 0.07763553410768509, 'timestamp': '2025-09-30 22:22:50.076846', 'step': 8711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:50.116047', 'step': 8711, 'epoch': 2} {'type': 'loss', 'content': 0.10303319245576859, 'timestamp': '2025-09-30 22:22:50.144610', 'step': 8712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.184110', 'step': 8712, 'epoch': 2} {'type': 'loss', 'content': 0.12633897364139557, 'timestamp': '2025-09-30 22:22:50.198504', 'step': 8713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.237737', 'step': 8713, 'epoch': 2} {'type': 'loss', 'content': 0.10546272993087769, 'timestamp': '2025-09-30 22:22:50.241375', 'step': 8714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.273417', 'step': 8714, 'epoch': 2} {'type': 'loss', 'content': 0.11883401870727539, 'timestamp': '2025-09-30 22:22:50.278182', 'step': 8715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:50.315214', 'step': 8715, 'epoch': 2} {'type': 'loss', 'content': 0.14444872736930847, 'timestamp': '2025-09-30 22:22:50.340433', 'step': 8716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.382545', 'step': 8716, 'epoch': 2} {'type': 'loss', 'content': 0.09829592704772949, 'timestamp': '2025-09-30 22:22:50.390681', 'step': 8717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.422747', 'step': 8717, 'epoch': 2} {'type': 'loss', 'content': 0.16495509445667267, 'timestamp': '2025-09-30 22:22:50.432929', 'step': 8718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.464910', 'step': 8718, 'epoch': 2} {'type': 'loss', 'content': 0.1687343418598175, 'timestamp': '2025-09-30 22:22:50.468603', 'step': 8719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.499744', 'step': 8719, 'epoch': 2} {'type': 'loss', 'content': 0.1492852121591568, 'timestamp': '2025-09-30 22:22:50.531980', 'step': 8720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:50.569378', 'step': 8720, 'epoch': 2} {'type': 'loss', 'content': 0.13019892573356628, 'timestamp': '2025-09-30 22:22:50.573306', 'step': 8721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:50.611039', 'step': 8721, 'epoch': 2} {'type': 'loss', 'content': 0.194572314620018, 'timestamp': '2025-09-30 22:22:50.614345', 'step': 8722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.647198', 'step': 8722, 'epoch': 2} {'type': 'loss', 'content': 0.11115662753582001, 'timestamp': '2025-09-30 22:22:50.656339', 'step': 8723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.692321', 'step': 8723, 'epoch': 2} {'type': 'loss', 'content': 0.07180635631084442, 'timestamp': '2025-09-30 22:22:50.717112', 'step': 8724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.756223', 'step': 8724, 'epoch': 2} {'type': 'loss', 'content': 0.08412561565637589, 'timestamp': '2025-09-30 22:22:50.769021', 'step': 8725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:50.801583', 'step': 8725, 'epoch': 2} {'type': 'loss', 'content': 0.1083419993519783, 'timestamp': '2025-09-30 22:22:50.815237', 'step': 8726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:50.846677', 'step': 8726, 'epoch': 2} {'type': 'loss', 'content': 0.18374423682689667, 'timestamp': '2025-09-30 22:22:50.850060', 'step': 8727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:50.892651', 'step': 8727, 'epoch': 2} {'type': 'loss', 'content': 0.03369176387786865, 'timestamp': '2025-09-30 22:22:50.919413', 'step': 8728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:50.961332', 'step': 8728, 'epoch': 2} {'type': 'loss', 'content': 0.10422977805137634, 'timestamp': '2025-09-30 22:22:50.965621', 'step': 8729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:51.006572', 'step': 8729, 'epoch': 2} {'type': 'loss', 'content': 0.13477802276611328, 'timestamp': '2025-09-30 22:22:51.020349', 'step': 8730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:51.061390', 'step': 8730, 'epoch': 2} {'type': 'loss', 'content': 0.1370077133178711, 'timestamp': '2025-09-30 22:22:51.072527', 'step': 8731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:51.110456', 'step': 8731, 'epoch': 2} {'type': 'loss', 'content': 0.08585815131664276, 'timestamp': '2025-09-30 22:22:51.135635', 'step': 8732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:51.167086', 'step': 8732, 'epoch': 2} {'type': 'loss', 'content': 0.1374361366033554, 'timestamp': '2025-09-30 22:22:51.170813', 'step': 8733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.201678', 'step': 8733, 'epoch': 2} {'type': 'loss', 'content': 0.09942201524972916, 'timestamp': '2025-09-30 22:22:51.205564', 'step': 8734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.245869', 'step': 8734, 'epoch': 2} {'type': 'loss', 'content': 0.05392160639166832, 'timestamp': '2025-09-30 22:22:51.249367', 'step': 8735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:51.302749', 'step': 8735, 'epoch': 2} {'type': 'loss', 'content': 0.1430087685585022, 'timestamp': '2025-09-30 22:22:51.326966', 'step': 8736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.358999', 'step': 8736, 'epoch': 2} {'type': 'loss', 'content': 0.16760192811489105, 'timestamp': '2025-09-30 22:22:51.361599', 'step': 8737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:51.393846', 'step': 8737, 'epoch': 2} {'type': 'loss', 'content': 0.14729590713977814, 'timestamp': '2025-09-30 22:22:51.405934', 'step': 8738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:51.448760', 'step': 8738, 'epoch': 2} {'type': 'loss', 'content': 0.1078186184167862, 'timestamp': '2025-09-30 22:22:51.452593', 'step': 8739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:51.485211', 'step': 8739, 'epoch': 2} {'type': 'loss', 'content': 0.13417574763298035, 'timestamp': '2025-09-30 22:22:51.509426', 'step': 8740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:51.541405', 'step': 8740, 'epoch': 2} {'type': 'loss', 'content': 0.106584832072258, 'timestamp': '2025-09-30 22:22:51.544859', 'step': 8741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.599645', 'step': 8741, 'epoch': 2} {'type': 'loss', 'content': 0.1033359244465828, 'timestamp': '2025-09-30 22:22:51.609478', 'step': 8742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:51.646110', 'step': 8742, 'epoch': 2} {'type': 'loss', 'content': 0.132560595870018, 'timestamp': '2025-09-30 22:22:51.657491', 'step': 8743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.714895', 'step': 8743, 'epoch': 2} {'type': 'loss', 'content': 0.1322053223848343, 'timestamp': '2025-09-30 22:22:51.740027', 'step': 8744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:51.772764', 'step': 8744, 'epoch': 2} {'type': 'loss', 'content': 0.07736919075250626, 'timestamp': '2025-09-30 22:22:51.775854', 'step': 8745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.807509', 'step': 8745, 'epoch': 2} {'type': 'loss', 'content': 0.16744405031204224, 'timestamp': '2025-09-30 22:22:51.818092', 'step': 8746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.856910', 'step': 8746, 'epoch': 2} {'type': 'loss', 'content': 0.1891767978668213, 'timestamp': '2025-09-30 22:22:51.860409', 'step': 8747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.891999', 'step': 8747, 'epoch': 2} {'type': 'loss', 'content': 0.09375739097595215, 'timestamp': '2025-09-30 22:22:51.917333', 'step': 8748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:51.958698', 'step': 8748, 'epoch': 2} {'type': 'loss', 'content': 0.13629882037639618, 'timestamp': '2025-09-30 22:22:51.969270', 'step': 8749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:52.001734', 'step': 8749, 'epoch': 2} {'type': 'loss', 'content': 0.1210130825638771, 'timestamp': '2025-09-30 22:22:52.005689', 'step': 8750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:52.036844', 'step': 8750, 'epoch': 2} {'type': 'loss', 'content': 0.1495378464460373, 'timestamp': '2025-09-30 22:22:52.052507', 'step': 8751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:52.092311', 'step': 8751, 'epoch': 2} {'type': 'loss', 'content': 0.05747494474053383, 'timestamp': '2025-09-30 22:22:52.124615', 'step': 8752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.165917', 'step': 8752, 'epoch': 2} {'type': 'loss', 'content': 0.07836468517780304, 'timestamp': '2025-09-30 22:22:52.170667', 'step': 8753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:52.207891', 'step': 8753, 'epoch': 2} {'type': 'loss', 'content': 0.09855292737483978, 'timestamp': '2025-09-30 22:22:52.211605', 'step': 8754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:52.249758', 'step': 8754, 'epoch': 2} {'type': 'loss', 'content': 0.11490002274513245, 'timestamp': '2025-09-30 22:22:52.261726', 'step': 8755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.301032', 'step': 8755, 'epoch': 2} {'type': 'loss', 'content': 0.15752100944519043, 'timestamp': '2025-09-30 22:22:52.335595', 'step': 8756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.366533', 'step': 8756, 'epoch': 2} {'type': 'loss', 'content': 0.08062653988599777, 'timestamp': '2025-09-30 22:22:52.376489', 'step': 8757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.416080', 'step': 8757, 'epoch': 2} {'type': 'loss', 'content': 0.1081705167889595, 'timestamp': '2025-09-30 22:22:52.420722', 'step': 8758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.454938', 'step': 8758, 'epoch': 2} {'type': 'loss', 'content': 0.10954781621694565, 'timestamp': '2025-09-30 22:22:52.458560', 'step': 8759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:52.491648', 'step': 8759, 'epoch': 2} {'type': 'loss', 'content': 0.07769984006881714, 'timestamp': '2025-09-30 22:22:52.516227', 'step': 8760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.553531', 'step': 8760, 'epoch': 2} {'type': 'loss', 'content': 0.21309593319892883, 'timestamp': '2025-09-30 22:22:52.564377', 'step': 8761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.602803', 'step': 8761, 'epoch': 2} {'type': 'loss', 'content': 0.13832704722881317, 'timestamp': '2025-09-30 22:22:52.613785', 'step': 8762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:52.652921', 'step': 8762, 'epoch': 2} {'type': 'loss', 'content': 0.18813104927539825, 'timestamp': '2025-09-30 22:22:52.665453', 'step': 8763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:52.702184', 'step': 8763, 'epoch': 2} {'type': 'loss', 'content': 0.11836058646440506, 'timestamp': '2025-09-30 22:22:52.734899', 'step': 8764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:52.766062', 'step': 8764, 'epoch': 2} {'type': 'loss', 'content': 0.12189964950084686, 'timestamp': '2025-09-30 22:22:52.769899', 'step': 8765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:52.811733', 'step': 8765, 'epoch': 2} {'type': 'loss', 'content': 0.22987836599349976, 'timestamp': '2025-09-30 22:22:52.822900', 'step': 8766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.860816', 'step': 8766, 'epoch': 2} {'type': 'loss', 'content': 0.13424605131149292, 'timestamp': '2025-09-30 22:22:52.865577', 'step': 8767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:52.898275', 'step': 8767, 'epoch': 2} {'type': 'loss', 'content': 0.10977105796337128, 'timestamp': '2025-09-30 22:22:52.935891', 'step': 8768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:52.967725', 'step': 8768, 'epoch': 2} {'type': 'loss', 'content': 0.0699990838766098, 'timestamp': '2025-09-30 22:22:52.971843', 'step': 8769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:53.004073', 'step': 8769, 'epoch': 2} {'type': 'loss', 'content': 0.0768868550658226, 'timestamp': '2025-09-30 22:22:53.009229', 'step': 8770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:53.042350', 'step': 8770, 'epoch': 2} {'type': 'loss', 'content': 0.10501503944396973, 'timestamp': '2025-09-30 22:22:53.054206', 'step': 8771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:53.112508', 'step': 8771, 'epoch': 2} {'type': 'loss', 'content': 0.08762206882238388, 'timestamp': '2025-09-30 22:22:53.139612', 'step': 8772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.173941', 'step': 8772, 'epoch': 2} {'type': 'loss', 'content': 0.10949578881263733, 'timestamp': '2025-09-30 22:22:53.178606', 'step': 8773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:53.213312', 'step': 8773, 'epoch': 2} {'type': 'loss', 'content': 0.10491637885570526, 'timestamp': '2025-09-30 22:22:53.225474', 'step': 8774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:53.265153', 'step': 8774, 'epoch': 2} {'type': 'loss', 'content': 0.08012353628873825, 'timestamp': '2025-09-30 22:22:53.268424', 'step': 8775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.300664', 'step': 8775, 'epoch': 2} {'type': 'loss', 'content': 0.10686448216438293, 'timestamp': '2025-09-30 22:22:53.334297', 'step': 8776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.367513', 'step': 8776, 'epoch': 2} {'type': 'loss', 'content': 0.07317636162042618, 'timestamp': '2025-09-30 22:22:53.370866', 'step': 8777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.413202', 'step': 8777, 'epoch': 2} {'type': 'loss', 'content': 0.09856189787387848, 'timestamp': '2025-09-30 22:22:53.421471', 'step': 8778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:53.454319', 'step': 8778, 'epoch': 2} {'type': 'loss', 'content': 0.13018952310085297, 'timestamp': '2025-09-30 22:22:53.459981', 'step': 8779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.492001', 'step': 8779, 'epoch': 2} {'type': 'loss', 'content': 0.04268525168299675, 'timestamp': '2025-09-30 22:22:53.527713', 'step': 8780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:53.569069', 'step': 8780, 'epoch': 2} {'type': 'loss', 'content': 0.15384826064109802, 'timestamp': '2025-09-30 22:22:53.582820', 'step': 8781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:22:53.623594', 'step': 8781, 'epoch': 2} {'type': 'loss', 'content': 0.08321874588727951, 'timestamp': '2025-09-30 22:22:53.640366', 'step': 8782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.685862', 'step': 8782, 'epoch': 2} {'type': 'loss', 'content': 0.09632539749145508, 'timestamp': '2025-09-30 22:22:53.699631', 'step': 8783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:53.740444', 'step': 8783, 'epoch': 2} {'type': 'loss', 'content': 0.10671599209308624, 'timestamp': '2025-09-30 22:22:53.772982', 'step': 8784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:53.804642', 'step': 8784, 'epoch': 2} {'type': 'loss', 'content': 0.16839076578617096, 'timestamp': '2025-09-30 22:22:53.819613', 'step': 8785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:53.864819', 'step': 8785, 'epoch': 2} {'type': 'loss', 'content': 0.06591483950614929, 'timestamp': '2025-09-30 22:22:53.876180', 'step': 8786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:53.919090', 'step': 8786, 'epoch': 2} {'type': 'loss', 'content': 0.1206575259566307, 'timestamp': '2025-09-30 22:22:53.932207', 'step': 8787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:53.974419', 'step': 8787, 'epoch': 2} {'type': 'loss', 'content': 0.09677102416753769, 'timestamp': '2025-09-30 22:22:54.000782', 'step': 8788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.036031', 'step': 8788, 'epoch': 2} {'type': 'loss', 'content': 0.10033980756998062, 'timestamp': '2025-09-30 22:22:54.041477', 'step': 8789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:54.075377', 'step': 8789, 'epoch': 2} {'type': 'loss', 'content': 0.05802693963050842, 'timestamp': '2025-09-30 22:22:54.078811', 'step': 8790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.111847', 'step': 8790, 'epoch': 2} {'type': 'loss', 'content': 0.07882623374462128, 'timestamp': '2025-09-30 22:22:54.125145', 'step': 8791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:54.165544', 'step': 8791, 'epoch': 2} {'type': 'loss', 'content': 0.21454371511936188, 'timestamp': '2025-09-30 22:22:54.198743', 'step': 8792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.230325', 'step': 8792, 'epoch': 2} {'type': 'loss', 'content': 0.14442884922027588, 'timestamp': '2025-09-30 22:22:54.235575', 'step': 8793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:54.280180', 'step': 8793, 'epoch': 2} {'type': 'loss', 'content': 0.11938700824975967, 'timestamp': '2025-09-30 22:22:54.285704', 'step': 8794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.318819', 'step': 8794, 'epoch': 2} {'type': 'loss', 'content': 0.1464070975780487, 'timestamp': '2025-09-30 22:22:54.322784', 'step': 8795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.356268', 'step': 8795, 'epoch': 2} {'type': 'loss', 'content': 0.07424917072057724, 'timestamp': '2025-09-30 22:22:54.393835', 'step': 8796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:54.425463', 'step': 8796, 'epoch': 2} {'type': 'loss', 'content': 0.06737450510263443, 'timestamp': '2025-09-30 22:22:54.439041', 'step': 8797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.471908', 'step': 8797, 'epoch': 2} {'type': 'loss', 'content': 0.04813035577535629, 'timestamp': '2025-09-30 22:22:54.476255', 'step': 8798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:54.508573', 'step': 8798, 'epoch': 2} {'type': 'loss', 'content': 0.11770815402269363, 'timestamp': '2025-09-30 22:22:54.512852', 'step': 8799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.544618', 'step': 8799, 'epoch': 2} {'type': 'loss', 'content': 0.05090852454304695, 'timestamp': '2025-09-30 22:22:54.575855', 'step': 8800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.609379', 'step': 8800, 'epoch': 2} {'type': 'loss', 'content': 0.13284237682819366, 'timestamp': '2025-09-30 22:22:54.614840', 'step': 8801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.657453', 'step': 8801, 'epoch': 2} {'type': 'loss', 'content': 0.09582919627428055, 'timestamp': '2025-09-30 22:22:54.672834', 'step': 8802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:54.704747', 'step': 8802, 'epoch': 2} {'type': 'loss', 'content': 0.058487292379140854, 'timestamp': '2025-09-30 22:22:54.709023', 'step': 8803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.741280', 'step': 8803, 'epoch': 2} {'type': 'loss', 'content': 0.07163363695144653, 'timestamp': '2025-09-30 22:22:54.766907', 'step': 8804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:54.809808', 'step': 8804, 'epoch': 2} {'type': 'loss', 'content': 0.16456584632396698, 'timestamp': '2025-09-30 22:22:54.824272', 'step': 8805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.857700', 'step': 8805, 'epoch': 2} {'type': 'loss', 'content': 0.1014997810125351, 'timestamp': '2025-09-30 22:22:54.872438', 'step': 8806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:54.913921', 'step': 8806, 'epoch': 2} {'type': 'loss', 'content': 0.1002269759774208, 'timestamp': '2025-09-30 22:22:54.930105', 'step': 8807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:54.972336', 'step': 8807, 'epoch': 2} {'type': 'loss', 'content': 0.18591314554214478, 'timestamp': '2025-09-30 22:22:54.998132', 'step': 8808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.031272', 'step': 8808, 'epoch': 2} {'type': 'loss', 'content': 0.18125319480895996, 'timestamp': '2025-09-30 22:22:55.041397', 'step': 8809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:55.080048', 'step': 8809, 'epoch': 2} {'type': 'loss', 'content': 0.08997831493616104, 'timestamp': '2025-09-30 22:22:55.089461', 'step': 8810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.128018', 'step': 8810, 'epoch': 2} {'type': 'loss', 'content': 0.11352477967739105, 'timestamp': '2025-09-30 22:22:55.131135', 'step': 8811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:55.169068', 'step': 8811, 'epoch': 2} {'type': 'loss', 'content': 0.060041554272174835, 'timestamp': '2025-09-30 22:22:55.193561', 'step': 8812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.225862', 'step': 8812, 'epoch': 2} {'type': 'loss', 'content': 0.12318018823862076, 'timestamp': '2025-09-30 22:22:55.243232', 'step': 8813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.275957', 'step': 8813, 'epoch': 2} {'type': 'loss', 'content': 0.10714595764875412, 'timestamp': '2025-09-30 22:22:55.286949', 'step': 8814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.327016', 'step': 8814, 'epoch': 2} {'type': 'loss', 'content': 0.18214505910873413, 'timestamp': '2025-09-30 22:22:55.341393', 'step': 8815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:55.384288', 'step': 8815, 'epoch': 2} {'type': 'loss', 'content': 0.08325430750846863, 'timestamp': '2025-09-30 22:22:55.410009', 'step': 8816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:55.441740', 'step': 8816, 'epoch': 2} {'type': 'loss', 'content': 0.14339502155780792, 'timestamp': '2025-09-30 22:22:55.454360', 'step': 8817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.495167', 'step': 8817, 'epoch': 2} {'type': 'loss', 'content': 0.19620759785175323, 'timestamp': '2025-09-30 22:22:55.506440', 'step': 8818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:55.541436', 'step': 8818, 'epoch': 2} {'type': 'loss', 'content': 0.18670439720153809, 'timestamp': '2025-09-30 22:22:55.551791', 'step': 8819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:55.590596', 'step': 8819, 'epoch': 2} {'type': 'loss', 'content': 0.17907005548477173, 'timestamp': '2025-09-30 22:22:55.615261', 'step': 8820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.646348', 'step': 8820, 'epoch': 2} {'type': 'loss', 'content': 0.09850320219993591, 'timestamp': '2025-09-30 22:22:55.655883', 'step': 8821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:55.693264', 'step': 8821, 'epoch': 2} {'type': 'loss', 'content': 0.053842514753341675, 'timestamp': '2025-09-30 22:22:55.705721', 'step': 8822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:55.738477', 'step': 8822, 'epoch': 2} {'type': 'loss', 'content': 0.10001788288354874, 'timestamp': '2025-09-30 22:22:55.748739', 'step': 8823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.787099', 'step': 8823, 'epoch': 2} {'type': 'loss', 'content': 0.15861012041568756, 'timestamp': '2025-09-30 22:22:55.819458', 'step': 8824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:55.856995', 'step': 8824, 'epoch': 2} {'type': 'loss', 'content': 0.06425876915454865, 'timestamp': '2025-09-30 22:22:55.867550', 'step': 8825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:55.906268', 'step': 8825, 'epoch': 2} {'type': 'loss', 'content': 0.09401915967464447, 'timestamp': '2025-09-30 22:22:55.919469', 'step': 8826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:55.961210', 'step': 8826, 'epoch': 2} {'type': 'loss', 'content': 0.1334947943687439, 'timestamp': '2025-09-30 22:22:55.966158', 'step': 8827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:56.006890', 'step': 8827, 'epoch': 2} {'type': 'loss', 'content': 0.05281349644064903, 'timestamp': '2025-09-30 22:22:56.040411', 'step': 8828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.080404', 'step': 8828, 'epoch': 2} {'type': 'loss', 'content': 0.12085048109292984, 'timestamp': '2025-09-30 22:22:56.090225', 'step': 8829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:56.129483', 'step': 8829, 'epoch': 2} {'type': 'loss', 'content': 0.12822365760803223, 'timestamp': '2025-09-30 22:22:56.141863', 'step': 8830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:56.182087', 'step': 8830, 'epoch': 2} {'type': 'loss', 'content': 0.1563257873058319, 'timestamp': '2025-09-30 22:22:56.194777', 'step': 8831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:56.234700', 'step': 8831, 'epoch': 2} {'type': 'loss', 'content': 0.0927874818444252, 'timestamp': '2025-09-30 22:22:56.259778', 'step': 8832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.291405', 'step': 8832, 'epoch': 2} {'type': 'loss', 'content': 0.024251388385891914, 'timestamp': '2025-09-30 22:22:56.294487', 'step': 8833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:56.326309', 'step': 8833, 'epoch': 2} {'type': 'loss', 'content': 0.17276400327682495, 'timestamp': '2025-09-30 22:22:56.339977', 'step': 8834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:56.374751', 'step': 8834, 'epoch': 2} {'type': 'loss', 'content': 0.10728371888399124, 'timestamp': '2025-09-30 22:22:56.390375', 'step': 8835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.431179', 'step': 8835, 'epoch': 2} {'type': 'loss', 'content': 0.20009300112724304, 'timestamp': '2025-09-30 22:22:56.456668', 'step': 8836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:22:56.488122', 'step': 8836, 'epoch': 2} {'type': 'loss', 'content': 0.10952354967594147, 'timestamp': '2025-09-30 22:22:56.491821', 'step': 8837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.524685', 'step': 8837, 'epoch': 2} {'type': 'loss', 'content': 0.2311614751815796, 'timestamp': '2025-09-30 22:22:56.536098', 'step': 8838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:56.576655', 'step': 8838, 'epoch': 2} {'type': 'loss', 'content': 0.12113702297210693, 'timestamp': '2025-09-30 22:22:56.589216', 'step': 8839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.621892', 'step': 8839, 'epoch': 2} {'type': 'loss', 'content': 0.08890532702207565, 'timestamp': '2025-09-30 22:22:56.648183', 'step': 8840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:56.687198', 'step': 8840, 'epoch': 2} {'type': 'loss', 'content': 0.10531559586524963, 'timestamp': '2025-09-30 22:22:56.691918', 'step': 8841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:56.730645', 'step': 8841, 'epoch': 2} {'type': 'loss', 'content': 0.07865907996892929, 'timestamp': '2025-09-30 22:22:56.742591', 'step': 8842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.780471', 'step': 8842, 'epoch': 2} {'type': 'loss', 'content': 0.1070077046751976, 'timestamp': '2025-09-30 22:22:56.792450', 'step': 8843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:56.824200', 'step': 8843, 'epoch': 2} {'type': 'loss', 'content': 0.05349795147776604, 'timestamp': '2025-09-30 22:22:56.856918', 'step': 8844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:56.889476', 'step': 8844, 'epoch': 2} {'type': 'loss', 'content': 0.14496609568595886, 'timestamp': '2025-09-30 22:22:56.894206', 'step': 8845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:56.926766', 'step': 8845, 'epoch': 2} {'type': 'loss', 'content': 0.1386614292860031, 'timestamp': '2025-09-30 22:22:56.938326', 'step': 8846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:22:56.971316', 'step': 8846, 'epoch': 2} {'type': 'loss', 'content': 0.10180680453777313, 'timestamp': '2025-09-30 22:22:56.974354', 'step': 8847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:22:57.013847', 'step': 8847, 'epoch': 2} {'type': 'loss', 'content': 0.09571491926908493, 'timestamp': '2025-09-30 22:22:57.047912', 'step': 8848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:57.078846', 'step': 8848, 'epoch': 2} {'type': 'loss', 'content': 0.17024433612823486, 'timestamp': '2025-09-30 22:22:57.089863', 'step': 8849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:57.129120', 'step': 8849, 'epoch': 2} {'type': 'loss', 'content': 0.10568967461585999, 'timestamp': '2025-09-30 22:22:57.131740', 'step': 8850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:22:57.168476', 'step': 8850, 'epoch': 2} {'type': 'loss', 'content': 0.12239333242177963, 'timestamp': '2025-09-30 22:22:57.178783', 'step': 8851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:22:57.209016', 'step': 8851, 'epoch': 2} {'type': 'loss', 'content': 0.042924363166093826, 'timestamp': '2025-09-30 22:22:57.233294', 'step': 8852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:57.269068', 'step': 8852, 'epoch': 2} {'type': 'loss', 'content': 0.11223730444908142, 'timestamp': '2025-09-30 22:22:57.278956', 'step': 8853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:22:57.310850', 'step': 8853, 'epoch': 2} {'type': 'loss', 'content': 0.11616545170545578, 'timestamp': '2025-09-30 22:22:57.319087', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:23:05.972254', 'step': 8854, 'epoch': 2} {'type': 'pplx', 'content': 11667.522983771392, 'timestamp': '2025-09-30 22:23:05.975836', 'step': 8854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.006367', 'step': 8854, 'epoch': 2} {'type': 'loss', 'content': 0.10662645846605301, 'timestamp': '2025-09-30 22:23:06.009022', 'step': 8855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:06.041141', 'step': 8855, 'epoch': 2} {'type': 'loss', 'content': 0.23629918694496155, 'timestamp': '2025-09-30 22:23:06.066333', 'step': 8856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.097119', 'step': 8856, 'epoch': 2} {'type': 'loss', 'content': 0.12569162249565125, 'timestamp': '2025-09-30 22:23:06.100311', 'step': 8857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:06.131999', 'step': 8857, 'epoch': 2} {'type': 'loss', 'content': 0.061097193509340286, 'timestamp': '2025-09-30 22:23:06.143059', 'step': 8858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.181849', 'step': 8858, 'epoch': 2} {'type': 'loss', 'content': 0.14497512578964233, 'timestamp': '2025-09-30 22:23:06.185756', 'step': 8859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.217804', 'step': 8859, 'epoch': 2} {'type': 'loss', 'content': 0.10251805931329727, 'timestamp': '2025-09-30 22:23:06.252274', 'step': 8860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:06.289623', 'step': 8860, 'epoch': 2} {'type': 'loss', 'content': 0.13229848444461823, 'timestamp': '2025-09-30 22:23:06.302501', 'step': 8861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.336871', 'step': 8861, 'epoch': 2} {'type': 'loss', 'content': 0.11710700392723083, 'timestamp': '2025-09-30 22:23:06.347616', 'step': 8862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:06.379369', 'step': 8862, 'epoch': 2} {'type': 'loss', 'content': 0.07400326430797577, 'timestamp': '2025-09-30 22:23:06.390232', 'step': 8863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.432115', 'step': 8863, 'epoch': 2} {'type': 'loss', 'content': 0.10951000452041626, 'timestamp': '2025-09-30 22:23:06.465860', 'step': 8864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.509609', 'step': 8864, 'epoch': 2} {'type': 'loss', 'content': 0.11363708972930908, 'timestamp': '2025-09-30 22:23:06.521866', 'step': 8865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:06.560416', 'step': 8865, 'epoch': 2} {'type': 'loss', 'content': 0.08454780280590057, 'timestamp': '2025-09-30 22:23:06.574279', 'step': 8866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:06.606196', 'step': 8866, 'epoch': 2} {'type': 'loss', 'content': 0.1308131217956543, 'timestamp': '2025-09-30 22:23:06.620327', 'step': 8867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.659735', 'step': 8867, 'epoch': 2} {'type': 'loss', 'content': 0.19044847786426544, 'timestamp': '2025-09-30 22:23:06.691345', 'step': 8868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.728422', 'step': 8868, 'epoch': 2} {'type': 'loss', 'content': 0.08199657499790192, 'timestamp': '2025-09-30 22:23:06.739696', 'step': 8869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.771104', 'step': 8869, 'epoch': 2} {'type': 'loss', 'content': 0.119191475212574, 'timestamp': '2025-09-30 22:23:06.780723', 'step': 8870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.812543', 'step': 8870, 'epoch': 2} {'type': 'loss', 'content': 0.12540045380592346, 'timestamp': '2025-09-30 22:23:06.824972', 'step': 8871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:06.862294', 'step': 8871, 'epoch': 2} {'type': 'loss', 'content': 0.07073120027780533, 'timestamp': '2025-09-30 22:23:06.886782', 'step': 8872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:06.918064', 'step': 8872, 'epoch': 2} {'type': 'loss', 'content': 0.15869058668613434, 'timestamp': '2025-09-30 22:23:06.922189', 'step': 8873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:06.961064', 'step': 8873, 'epoch': 2} {'type': 'loss', 'content': 0.09757677465677261, 'timestamp': '2025-09-30 22:23:06.972934', 'step': 8874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:07.004231', 'step': 8874, 'epoch': 2} {'type': 'loss', 'content': 0.13855919241905212, 'timestamp': '2025-09-30 22:23:07.015108', 'step': 8875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:07.052082', 'step': 8875, 'epoch': 2} {'type': 'loss', 'content': 0.0846201702952385, 'timestamp': '2025-09-30 22:23:07.078276', 'step': 8876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.117254', 'step': 8876, 'epoch': 2} {'type': 'loss', 'content': 0.21947799623012543, 'timestamp': '2025-09-30 22:23:07.121585', 'step': 8877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:07.153781', 'step': 8877, 'epoch': 2} {'type': 'loss', 'content': 0.14381270110607147, 'timestamp': '2025-09-30 22:23:07.163412', 'step': 8878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:07.202261', 'step': 8878, 'epoch': 2} {'type': 'loss', 'content': 0.13014712929725647, 'timestamp': '2025-09-30 22:23:07.213266', 'step': 8879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:07.251752', 'step': 8879, 'epoch': 2} {'type': 'loss', 'content': 0.2014857828617096, 'timestamp': '2025-09-30 22:23:07.286037', 'step': 8880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:07.324543', 'step': 8880, 'epoch': 2} {'type': 'loss', 'content': 0.07652775943279266, 'timestamp': '2025-09-30 22:23:07.335631', 'step': 8881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:07.377274', 'step': 8881, 'epoch': 2} {'type': 'loss', 'content': 0.16925352811813354, 'timestamp': '2025-09-30 22:23:07.386238', 'step': 8882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.425765', 'step': 8882, 'epoch': 2} {'type': 'loss', 'content': 0.10161615908145905, 'timestamp': '2025-09-30 22:23:07.435228', 'step': 8883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.467890', 'step': 8883, 'epoch': 2} {'type': 'loss', 'content': 0.14875918626785278, 'timestamp': '2025-09-30 22:23:07.499054', 'step': 8884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.531274', 'step': 8884, 'epoch': 2} {'type': 'loss', 'content': 0.13085806369781494, 'timestamp': '2025-09-30 22:23:07.535756', 'step': 8885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:07.574903', 'step': 8885, 'epoch': 2} {'type': 'loss', 'content': 0.2260592132806778, 'timestamp': '2025-09-30 22:23:07.579355', 'step': 8886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:07.628555', 'step': 8886, 'epoch': 2} {'type': 'loss', 'content': 0.10833381116390228, 'timestamp': '2025-09-30 22:23:07.643196', 'step': 8887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.682682', 'step': 8887, 'epoch': 2} {'type': 'loss', 'content': 0.09827292710542679, 'timestamp': '2025-09-30 22:23:07.708516', 'step': 8888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:07.749349', 'step': 8888, 'epoch': 2} {'type': 'loss', 'content': 0.08094053715467453, 'timestamp': '2025-09-30 22:23:07.759554', 'step': 8889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:07.804524', 'step': 8889, 'epoch': 2} {'type': 'loss', 'content': 0.14376558363437653, 'timestamp': '2025-09-30 22:23:07.817958', 'step': 8890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:07.858875', 'step': 8890, 'epoch': 2} {'type': 'loss', 'content': 0.1036449447274208, 'timestamp': '2025-09-30 22:23:07.874143', 'step': 8891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:07.914482', 'step': 8891, 'epoch': 2} {'type': 'loss', 'content': 0.15252834558486938, 'timestamp': '2025-09-30 22:23:07.940883', 'step': 8892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:07.981789', 'step': 8892, 'epoch': 2} {'type': 'loss', 'content': 0.07848964631557465, 'timestamp': '2025-09-30 22:23:07.985777', 'step': 8893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:08.024769', 'step': 8893, 'epoch': 2} {'type': 'loss', 'content': 0.16199781000614166, 'timestamp': '2025-09-30 22:23:08.034803', 'step': 8894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.066933', 'step': 8894, 'epoch': 2} {'type': 'loss', 'content': 0.06418918818235397, 'timestamp': '2025-09-30 22:23:08.082454', 'step': 8895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.114545', 'step': 8895, 'epoch': 2} {'type': 'loss', 'content': 0.0704704076051712, 'timestamp': '2025-09-30 22:23:08.140919', 'step': 8896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.177861', 'step': 8896, 'epoch': 2} {'type': 'loss', 'content': 0.12012161314487457, 'timestamp': '2025-09-30 22:23:08.181683', 'step': 8897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.222011', 'step': 8897, 'epoch': 2} {'type': 'loss', 'content': 0.10950776934623718, 'timestamp': '2025-09-30 22:23:08.230931', 'step': 8898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:08.261591', 'step': 8898, 'epoch': 2} {'type': 'loss', 'content': 0.09551166743040085, 'timestamp': '2025-09-30 22:23:08.264729', 'step': 8899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:08.302878', 'step': 8899, 'epoch': 2} {'type': 'loss', 'content': 0.12457969784736633, 'timestamp': '2025-09-30 22:23:08.333477', 'step': 8900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.364957', 'step': 8900, 'epoch': 2} {'type': 'loss', 'content': 0.08653906732797623, 'timestamp': '2025-09-30 22:23:08.367670', 'step': 8901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.404727', 'step': 8901, 'epoch': 2} {'type': 'loss', 'content': 0.17538666725158691, 'timestamp': '2025-09-30 22:23:08.414879', 'step': 8902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.447448', 'step': 8902, 'epoch': 2} {'type': 'loss', 'content': 0.1294652223587036, 'timestamp': '2025-09-30 22:23:08.456901', 'step': 8903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.492877', 'step': 8903, 'epoch': 2} {'type': 'loss', 'content': 0.1683117151260376, 'timestamp': '2025-09-30 22:23:08.518541', 'step': 8904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.548686', 'step': 8904, 'epoch': 2} {'type': 'loss', 'content': 0.261286199092865, 'timestamp': '2025-09-30 22:23:08.552373', 'step': 8905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:08.583387', 'step': 8905, 'epoch': 2} {'type': 'loss', 'content': 0.08515527099370956, 'timestamp': '2025-09-30 22:23:08.587041', 'step': 8906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:08.621574', 'step': 8906, 'epoch': 2} {'type': 'loss', 'content': 0.11233063787221909, 'timestamp': '2025-09-30 22:23:08.624234', 'step': 8907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.654245', 'step': 8907, 'epoch': 2} {'type': 'loss', 'content': 0.18061743676662445, 'timestamp': '2025-09-30 22:23:08.678794', 'step': 8908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.709060', 'step': 8908, 'epoch': 2} {'type': 'loss', 'content': 0.08659534156322479, 'timestamp': '2025-09-30 22:23:08.711314', 'step': 8909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:08.742180', 'step': 8909, 'epoch': 2} {'type': 'loss', 'content': 0.1287083476781845, 'timestamp': '2025-09-30 22:23:08.754910', 'step': 8910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.785465', 'step': 8910, 'epoch': 2} {'type': 'loss', 'content': 0.17104360461235046, 'timestamp': '2025-09-30 22:23:08.794300', 'step': 8911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.831624', 'step': 8911, 'epoch': 2} {'type': 'loss', 'content': 0.09712512046098709, 'timestamp': '2025-09-30 22:23:08.855997', 'step': 8912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.888064', 'step': 8912, 'epoch': 2} {'type': 'loss', 'content': 0.09032226353883743, 'timestamp': '2025-09-30 22:23:08.891157', 'step': 8913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.921998', 'step': 8913, 'epoch': 2} {'type': 'loss', 'content': 0.05868762731552124, 'timestamp': '2025-09-30 22:23:08.924768', 'step': 8914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:08.955223', 'step': 8914, 'epoch': 2} {'type': 'loss', 'content': 0.17548982799053192, 'timestamp': '2025-09-30 22:23:08.958773', 'step': 8915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:08.995807', 'step': 8915, 'epoch': 2} {'type': 'loss', 'content': 0.037112317979335785, 'timestamp': '2025-09-30 22:23:09.027760', 'step': 8916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.058390', 'step': 8916, 'epoch': 2} {'type': 'loss', 'content': 0.04730147123336792, 'timestamp': '2025-09-30 22:23:09.062195', 'step': 8917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.103367', 'step': 8917, 'epoch': 2} {'type': 'loss', 'content': 0.082085520029068, 'timestamp': '2025-09-30 22:23:09.107051', 'step': 8918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.143467', 'step': 8918, 'epoch': 2} {'type': 'loss', 'content': 0.09916138648986816, 'timestamp': '2025-09-30 22:23:09.146326', 'step': 8919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.177752', 'step': 8919, 'epoch': 2} {'type': 'loss', 'content': 0.11948616057634354, 'timestamp': '2025-09-30 22:23:09.203292', 'step': 8920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:09.245588', 'step': 8920, 'epoch': 2} {'type': 'loss', 'content': 0.10076389461755753, 'timestamp': '2025-09-30 22:23:09.257283', 'step': 8921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.296501', 'step': 8921, 'epoch': 2} {'type': 'loss', 'content': 0.12861545383930206, 'timestamp': '2025-09-30 22:23:09.300054', 'step': 8922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.330952', 'step': 8922, 'epoch': 2} {'type': 'loss', 'content': 0.18965305387973785, 'timestamp': '2025-09-30 22:23:09.342020', 'step': 8923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.380816', 'step': 8923, 'epoch': 2} {'type': 'loss', 'content': 0.13689090311527252, 'timestamp': '2025-09-30 22:23:09.408191', 'step': 8924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:09.439149', 'step': 8924, 'epoch': 2} {'type': 'loss', 'content': 0.10040304809808731, 'timestamp': '2025-09-30 22:23:09.451491', 'step': 8925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.492856', 'step': 8925, 'epoch': 2} {'type': 'loss', 'content': 0.07979445904493332, 'timestamp': '2025-09-30 22:23:09.497240', 'step': 8926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.530609', 'step': 8926, 'epoch': 2} {'type': 'loss', 'content': 0.06766927242279053, 'timestamp': '2025-09-30 22:23:09.540043', 'step': 8927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.579274', 'step': 8927, 'epoch': 2} {'type': 'loss', 'content': 0.22149090468883514, 'timestamp': '2025-09-30 22:23:09.613855', 'step': 8928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.650657', 'step': 8928, 'epoch': 2} {'type': 'loss', 'content': 0.20552192628383636, 'timestamp': '2025-09-30 22:23:09.662924', 'step': 8929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.700734', 'step': 8929, 'epoch': 2} {'type': 'loss', 'content': 0.1599975824356079, 'timestamp': '2025-09-30 22:23:09.704252', 'step': 8930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:09.741886', 'step': 8930, 'epoch': 2} {'type': 'loss', 'content': 0.08547904342412949, 'timestamp': '2025-09-30 22:23:09.746334', 'step': 8931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:09.785403', 'step': 8931, 'epoch': 2} {'type': 'loss', 'content': 0.06693724542856216, 'timestamp': '2025-09-30 22:23:09.817457', 'step': 8932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.856839', 'step': 8932, 'epoch': 2} {'type': 'loss', 'content': 0.04612857848405838, 'timestamp': '2025-09-30 22:23:09.866292', 'step': 8933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:09.897880', 'step': 8933, 'epoch': 2} {'type': 'loss', 'content': 0.12550222873687744, 'timestamp': '2025-09-30 22:23:09.908052', 'step': 8934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:09.944629', 'step': 8934, 'epoch': 2} {'type': 'loss', 'content': 0.09073779731988907, 'timestamp': '2025-09-30 22:23:09.955172', 'step': 8935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:09.994632', 'step': 8935, 'epoch': 2} {'type': 'loss', 'content': 0.05472123250365257, 'timestamp': '2025-09-30 22:23:10.026606', 'step': 8936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.059006', 'step': 8936, 'epoch': 2} {'type': 'loss', 'content': 0.18774038553237915, 'timestamp': '2025-09-30 22:23:10.072998', 'step': 8937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.111176', 'step': 8937, 'epoch': 2} {'type': 'loss', 'content': 0.03964598476886749, 'timestamp': '2025-09-30 22:23:10.122912', 'step': 8938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:10.162094', 'step': 8938, 'epoch': 2} {'type': 'loss', 'content': 0.14472641050815582, 'timestamp': '2025-09-30 22:23:10.172549', 'step': 8939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.211107', 'step': 8939, 'epoch': 2} {'type': 'loss', 'content': 0.11884504556655884, 'timestamp': '2025-09-30 22:23:10.241996', 'step': 8940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.281940', 'step': 8940, 'epoch': 2} {'type': 'loss', 'content': 0.08766677230596542, 'timestamp': '2025-09-30 22:23:10.285752', 'step': 8941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.317566', 'step': 8941, 'epoch': 2} {'type': 'loss', 'content': 0.09980487078428268, 'timestamp': '2025-09-30 22:23:10.327448', 'step': 8942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.365275', 'step': 8942, 'epoch': 2} {'type': 'loss', 'content': 0.08662798255681992, 'timestamp': '2025-09-30 22:23:10.368594', 'step': 8943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.400918', 'step': 8943, 'epoch': 2} {'type': 'loss', 'content': 0.07888207584619522, 'timestamp': '2025-09-30 22:23:10.426365', 'step': 8944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.464354', 'step': 8944, 'epoch': 2} {'type': 'loss', 'content': 0.13931551575660706, 'timestamp': '2025-09-30 22:23:10.475614', 'step': 8945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.509175', 'step': 8945, 'epoch': 2} {'type': 'loss', 'content': 0.08946579694747925, 'timestamp': '2025-09-30 22:23:10.520899', 'step': 8946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.552491', 'step': 8946, 'epoch': 2} {'type': 'loss', 'content': 0.07344268262386322, 'timestamp': '2025-09-30 22:23:10.555239', 'step': 8947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.587026', 'step': 8947, 'epoch': 2} {'type': 'loss', 'content': 0.1283608376979828, 'timestamp': '2025-09-30 22:23:10.611776', 'step': 8948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:10.648828', 'step': 8948, 'epoch': 2} {'type': 'loss', 'content': 0.1270841658115387, 'timestamp': '2025-09-30 22:23:10.658623', 'step': 8949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.704147', 'step': 8949, 'epoch': 2} {'type': 'loss', 'content': 0.1714901626110077, 'timestamp': '2025-09-30 22:23:10.713998', 'step': 8950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.751402', 'step': 8950, 'epoch': 2} {'type': 'loss', 'content': 0.09618300199508667, 'timestamp': '2025-09-30 22:23:10.754214', 'step': 8951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:10.791218', 'step': 8951, 'epoch': 2} {'type': 'loss', 'content': 0.07879123091697693, 'timestamp': '2025-09-30 22:23:10.822552', 'step': 8952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:10.860671', 'step': 8952, 'epoch': 2} {'type': 'loss', 'content': 0.10725436359643936, 'timestamp': '2025-09-30 22:23:10.869626', 'step': 8953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.907940', 'step': 8953, 'epoch': 2} {'type': 'loss', 'content': 0.07535149902105331, 'timestamp': '2025-09-30 22:23:10.917785', 'step': 8954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:10.953700', 'step': 8954, 'epoch': 2} {'type': 'loss', 'content': 0.11800689250230789, 'timestamp': '2025-09-30 22:23:10.958192', 'step': 8955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:10.991458', 'step': 8955, 'epoch': 2} {'type': 'loss', 'content': 0.23777146637439728, 'timestamp': '2025-09-30 22:23:11.017219', 'step': 8956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.050355', 'step': 8956, 'epoch': 2} {'type': 'loss', 'content': 0.132505401968956, 'timestamp': '2025-09-30 22:23:11.060735', 'step': 8957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:11.098188', 'step': 8957, 'epoch': 2} {'type': 'loss', 'content': 0.09519059956073761, 'timestamp': '2025-09-30 22:23:11.109345', 'step': 8958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:11.149978', 'step': 8958, 'epoch': 2} {'type': 'loss', 'content': 0.06088171899318695, 'timestamp': '2025-09-30 22:23:11.158225', 'step': 8959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.188801', 'step': 8959, 'epoch': 2} {'type': 'loss', 'content': 0.10090218484401703, 'timestamp': '2025-09-30 22:23:11.213998', 'step': 8960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.245360', 'step': 8960, 'epoch': 2} {'type': 'loss', 'content': 0.15967394411563873, 'timestamp': '2025-09-30 22:23:11.248498', 'step': 8961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.279125', 'step': 8961, 'epoch': 2} {'type': 'loss', 'content': 0.14255790412425995, 'timestamp': '2025-09-30 22:23:11.282122', 'step': 8962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.312376', 'step': 8962, 'epoch': 2} {'type': 'loss', 'content': 0.12368268519639969, 'timestamp': '2025-09-30 22:23:11.324417', 'step': 8963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.355287', 'step': 8963, 'epoch': 2} {'type': 'loss', 'content': 0.09113426506519318, 'timestamp': '2025-09-30 22:23:11.388191', 'step': 8964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.433252', 'step': 8964, 'epoch': 2} {'type': 'loss', 'content': 0.12571215629577637, 'timestamp': '2025-09-30 22:23:11.436244', 'step': 8965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.466789', 'step': 8965, 'epoch': 2} {'type': 'loss', 'content': 0.12484809011220932, 'timestamp': '2025-09-30 22:23:11.470022', 'step': 8966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.500044', 'step': 8966, 'epoch': 2} {'type': 'loss', 'content': 0.06003923341631889, 'timestamp': '2025-09-30 22:23:11.510289', 'step': 8967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.548495', 'step': 8967, 'epoch': 2} {'type': 'loss', 'content': 0.11520592868328094, 'timestamp': '2025-09-30 22:23:11.575519', 'step': 8968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:11.613384', 'step': 8968, 'epoch': 2} {'type': 'loss', 'content': 0.14545311033725739, 'timestamp': '2025-09-30 22:23:11.616336', 'step': 8969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:11.648794', 'step': 8969, 'epoch': 2} {'type': 'loss', 'content': 0.10533621162176132, 'timestamp': '2025-09-30 22:23:11.653377', 'step': 8970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:23:11.684521', 'step': 8970, 'epoch': 2} {'type': 'loss', 'content': 0.08541013300418854, 'timestamp': '2025-09-30 22:23:11.694985', 'step': 8971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.733792', 'step': 8971, 'epoch': 2} {'type': 'loss', 'content': 0.10790961235761642, 'timestamp': '2025-09-30 22:23:11.763991', 'step': 8972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.794891', 'step': 8972, 'epoch': 2} {'type': 'loss', 'content': 0.11197534948587418, 'timestamp': '2025-09-30 22:23:11.804984', 'step': 8973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.839494', 'step': 8973, 'epoch': 2} {'type': 'loss', 'content': 0.2090030163526535, 'timestamp': '2025-09-30 22:23:11.842805', 'step': 8974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:11.873384', 'step': 8974, 'epoch': 2} {'type': 'loss', 'content': 0.06214148923754692, 'timestamp': '2025-09-30 22:23:11.876800', 'step': 8975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.914333', 'step': 8975, 'epoch': 2} {'type': 'loss', 'content': 0.1713147908449173, 'timestamp': '2025-09-30 22:23:11.945102', 'step': 8976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:11.983634', 'step': 8976, 'epoch': 2} {'type': 'loss', 'content': 0.07540851086378098, 'timestamp': '2025-09-30 22:23:11.987712', 'step': 8977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.025286', 'step': 8977, 'epoch': 2} {'type': 'loss', 'content': 0.12375146895647049, 'timestamp': '2025-09-30 22:23:12.034866', 'step': 8978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:12.073179', 'step': 8978, 'epoch': 2} {'type': 'loss', 'content': 0.27318695187568665, 'timestamp': '2025-09-30 22:23:12.075806', 'step': 8979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.107007', 'step': 8979, 'epoch': 2} {'type': 'loss', 'content': 0.05494527891278267, 'timestamp': '2025-09-30 22:23:12.137643', 'step': 8980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.171471', 'step': 8980, 'epoch': 2} {'type': 'loss', 'content': 0.03889842703938484, 'timestamp': '2025-09-30 22:23:12.174944', 'step': 8981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:12.207616', 'step': 8981, 'epoch': 2} {'type': 'loss', 'content': 0.09666810184717178, 'timestamp': '2025-09-30 22:23:12.212424', 'step': 8982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:12.245094', 'step': 8982, 'epoch': 2} {'type': 'loss', 'content': 0.10785340517759323, 'timestamp': '2025-09-30 22:23:12.249932', 'step': 8983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:12.282200', 'step': 8983, 'epoch': 2} {'type': 'loss', 'content': 0.11348381638526917, 'timestamp': '2025-09-30 22:23:12.308553', 'step': 8984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.340616', 'step': 8984, 'epoch': 2} {'type': 'loss', 'content': 0.14871327579021454, 'timestamp': '2025-09-30 22:23:12.344816', 'step': 8985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:12.376614', 'step': 8985, 'epoch': 2} {'type': 'loss', 'content': 0.14441579580307007, 'timestamp': '2025-09-30 22:23:12.381804', 'step': 8986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.413938', 'step': 8986, 'epoch': 2} {'type': 'loss', 'content': 0.11704210937023163, 'timestamp': '2025-09-30 22:23:12.416396', 'step': 8987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:12.447201', 'step': 8987, 'epoch': 2} {'type': 'loss', 'content': 0.09494394809007645, 'timestamp': '2025-09-30 22:23:12.473323', 'step': 8988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.503361', 'step': 8988, 'epoch': 2} {'type': 'loss', 'content': 0.10880887508392334, 'timestamp': '2025-09-30 22:23:12.505645', 'step': 8989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.537961', 'step': 8989, 'epoch': 2} {'type': 'loss', 'content': 0.06934000551700592, 'timestamp': '2025-09-30 22:23:12.541076', 'step': 8990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.571925', 'step': 8990, 'epoch': 2} {'type': 'loss', 'content': 0.11339934170246124, 'timestamp': '2025-09-30 22:23:12.575743', 'step': 8991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.608231', 'step': 8991, 'epoch': 2} {'type': 'loss', 'content': 0.09801526367664337, 'timestamp': '2025-09-30 22:23:12.632035', 'step': 8992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:12.661458', 'step': 8992, 'epoch': 2} {'type': 'loss', 'content': 0.09237920492887497, 'timestamp': '2025-09-30 22:23:12.672656', 'step': 8993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.707979', 'step': 8993, 'epoch': 2} {'type': 'loss', 'content': 0.06975304335355759, 'timestamp': '2025-09-30 22:23:12.713190', 'step': 8994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.747965', 'step': 8994, 'epoch': 2} {'type': 'loss', 'content': 0.14053837954998016, 'timestamp': '2025-09-30 22:23:12.753355', 'step': 8995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:12.786865', 'step': 8995, 'epoch': 2} {'type': 'loss', 'content': 0.05028533935546875, 'timestamp': '2025-09-30 22:23:12.813387', 'step': 8996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:12.844259', 'step': 8996, 'epoch': 2} {'type': 'loss', 'content': 0.05334131419658661, 'timestamp': '2025-09-30 22:23:12.849093', 'step': 8997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:12.882500', 'step': 8997, 'epoch': 2} {'type': 'loss', 'content': 0.22113794088363647, 'timestamp': '2025-09-30 22:23:12.886731', 'step': 8998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:12.917839', 'step': 8998, 'epoch': 2} {'type': 'loss', 'content': 0.1095244437456131, 'timestamp': '2025-09-30 22:23:12.922801', 'step': 8999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:12.953761', 'step': 8999, 'epoch': 2} {'type': 'loss', 'content': 0.11347242444753647, 'timestamp': '2025-09-30 22:23:12.979499', 'step': 9000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9000', 'timestamp': '2025-09-30 22:23:17.983306', 'step': 9000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.025352', 'step': 9000, 'epoch': 2} {'type': 'loss', 'content': 0.16680334508419037, 'timestamp': '2025-09-30 22:23:18.028414', 'step': 9001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:18.068621', 'step': 9001, 'epoch': 2} {'type': 'loss', 'content': 0.11818496137857437, 'timestamp': '2025-09-30 22:23:18.073327', 'step': 9002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.104632', 'step': 9002, 'epoch': 2} {'type': 'loss', 'content': 0.07481417804956436, 'timestamp': '2025-09-30 22:23:18.109769', 'step': 9003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.144183', 'step': 9003, 'epoch': 2} {'type': 'loss', 'content': 0.10961494594812393, 'timestamp': '2025-09-30 22:23:18.174179', 'step': 9004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.212862', 'step': 9004, 'epoch': 2} {'type': 'loss', 'content': 0.2111346572637558, 'timestamp': '2025-09-30 22:23:18.224120', 'step': 9005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.265522', 'step': 9005, 'epoch': 2} {'type': 'loss', 'content': 0.04859119653701782, 'timestamp': '2025-09-30 22:23:18.274537', 'step': 9006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.312994', 'step': 9006, 'epoch': 2} {'type': 'loss', 'content': 0.08307017385959625, 'timestamp': '2025-09-30 22:23:18.323613', 'step': 9007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:18.356170', 'step': 9007, 'epoch': 2} {'type': 'loss', 'content': 0.09758871793746948, 'timestamp': '2025-09-30 22:23:18.385472', 'step': 9008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.415900', 'step': 9008, 'epoch': 2} {'type': 'loss', 'content': 0.13508635759353638, 'timestamp': '2025-09-30 22:23:18.420409', 'step': 9009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.455657', 'step': 9009, 'epoch': 2} {'type': 'loss', 'content': 0.08063184469938278, 'timestamp': '2025-09-30 22:23:18.459668', 'step': 9010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:18.490912', 'step': 9010, 'epoch': 2} {'type': 'loss', 'content': 0.09982343018054962, 'timestamp': '2025-09-30 22:23:18.495730', 'step': 9011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.529435', 'step': 9011, 'epoch': 2} {'type': 'loss', 'content': 0.030138792470097542, 'timestamp': '2025-09-30 22:23:18.556109', 'step': 9012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.590244', 'step': 9012, 'epoch': 2} {'type': 'loss', 'content': 0.1420530080795288, 'timestamp': '2025-09-30 22:23:18.594923', 'step': 9013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.625888', 'step': 9013, 'epoch': 2} {'type': 'loss', 'content': 0.13389192521572113, 'timestamp': '2025-09-30 22:23:18.631776', 'step': 9014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.667055', 'step': 9014, 'epoch': 2} {'type': 'loss', 'content': 0.16643786430358887, 'timestamp': '2025-09-30 22:23:18.673587', 'step': 9015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.707977', 'step': 9015, 'epoch': 2} {'type': 'loss', 'content': 0.14306709170341492, 'timestamp': '2025-09-30 22:23:18.733845', 'step': 9016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.769163', 'step': 9016, 'epoch': 2} {'type': 'loss', 'content': 0.08985629677772522, 'timestamp': '2025-09-30 22:23:18.774673', 'step': 9017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.809450', 'step': 9017, 'epoch': 2} {'type': 'loss', 'content': 0.05855878069996834, 'timestamp': '2025-09-30 22:23:18.815275', 'step': 9018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:18.848831', 'step': 9018, 'epoch': 2} {'type': 'loss', 'content': 0.1196223720908165, 'timestamp': '2025-09-30 22:23:18.856444', 'step': 9019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.891419', 'step': 9019, 'epoch': 2} {'type': 'loss', 'content': 0.12688316404819489, 'timestamp': '2025-09-30 22:23:18.919908', 'step': 9020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:18.950998', 'step': 9020, 'epoch': 2} {'type': 'loss', 'content': 0.0912536084651947, 'timestamp': '2025-09-30 22:23:18.954719', 'step': 9021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:18.988549', 'step': 9021, 'epoch': 2} {'type': 'loss', 'content': 0.050126541405916214, 'timestamp': '2025-09-30 22:23:19.009546', 'step': 9022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:19.040781', 'step': 9022, 'epoch': 2} {'type': 'loss', 'content': 0.0940646156668663, 'timestamp': '2025-09-30 22:23:19.043900', 'step': 9023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.074371', 'step': 9023, 'epoch': 2} {'type': 'loss', 'content': 0.07600246369838715, 'timestamp': '2025-09-30 22:23:19.098064', 'step': 9024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.130437', 'step': 9024, 'epoch': 2} {'type': 'loss', 'content': 0.09153476357460022, 'timestamp': '2025-09-30 22:23:19.133265', 'step': 9025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.169536', 'step': 9025, 'epoch': 2} {'type': 'loss', 'content': 0.1565658152103424, 'timestamp': '2025-09-30 22:23:19.172016', 'step': 9026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.202290', 'step': 9026, 'epoch': 2} {'type': 'loss', 'content': 0.05536683648824692, 'timestamp': '2025-09-30 22:23:19.205373', 'step': 9027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.236300', 'step': 9027, 'epoch': 2} {'type': 'loss', 'content': 0.10204070806503296, 'timestamp': '2025-09-30 22:23:19.261530', 'step': 9028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.293282', 'step': 9028, 'epoch': 2} {'type': 'loss', 'content': 0.13174378871917725, 'timestamp': '2025-09-30 22:23:19.305712', 'step': 9029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.337633', 'step': 9029, 'epoch': 2} {'type': 'loss', 'content': 0.1462155133485794, 'timestamp': '2025-09-30 22:23:19.340447', 'step': 9030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.370771', 'step': 9030, 'epoch': 2} {'type': 'loss', 'content': 0.10293973237276077, 'timestamp': '2025-09-30 22:23:19.374692', 'step': 9031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.405999', 'step': 9031, 'epoch': 2} {'type': 'loss', 'content': 0.23469491302967072, 'timestamp': '2025-09-30 22:23:19.432140', 'step': 9032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:19.463082', 'step': 9032, 'epoch': 2} {'type': 'loss', 'content': 0.08788429945707321, 'timestamp': '2025-09-30 22:23:19.466685', 'step': 9033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:23:19.497586', 'step': 9033, 'epoch': 2} {'type': 'loss', 'content': 0.07644598931074142, 'timestamp': '2025-09-30 22:23:19.502109', 'step': 9034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.532371', 'step': 9034, 'epoch': 2} {'type': 'loss', 'content': 0.13178610801696777, 'timestamp': '2025-09-30 22:23:19.535024', 'step': 9035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.565162', 'step': 9035, 'epoch': 2} {'type': 'loss', 'content': 0.14885744452476501, 'timestamp': '2025-09-30 22:23:19.589416', 'step': 9036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.624154', 'step': 9036, 'epoch': 2} {'type': 'loss', 'content': 0.1405741274356842, 'timestamp': '2025-09-30 22:23:19.626641', 'step': 9037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.661452', 'step': 9037, 'epoch': 2} {'type': 'loss', 'content': 0.12903420627117157, 'timestamp': '2025-09-30 22:23:19.664053', 'step': 9038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.695374', 'step': 9038, 'epoch': 2} {'type': 'loss', 'content': 0.10908439755439758, 'timestamp': '2025-09-30 22:23:19.698263', 'step': 9039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:19.731675', 'step': 9039, 'epoch': 2} {'type': 'loss', 'content': 0.10422363132238388, 'timestamp': '2025-09-30 22:23:19.756120', 'step': 9040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:19.786124', 'step': 9040, 'epoch': 2} {'type': 'loss', 'content': 0.08766524493694305, 'timestamp': '2025-09-30 22:23:19.789577', 'step': 9041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.820083', 'step': 9041, 'epoch': 2} {'type': 'loss', 'content': 0.18890218436717987, 'timestamp': '2025-09-30 22:23:19.823822', 'step': 9042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.854710', 'step': 9042, 'epoch': 2} {'type': 'loss', 'content': 0.1713571548461914, 'timestamp': '2025-09-30 22:23:19.858735', 'step': 9043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:19.889870', 'step': 9043, 'epoch': 2} {'type': 'loss', 'content': 0.07890782505273819, 'timestamp': '2025-09-30 22:23:19.914401', 'step': 9044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.945273', 'step': 9044, 'epoch': 2} {'type': 'loss', 'content': 0.12539014220237732, 'timestamp': '2025-09-30 22:23:19.948070', 'step': 9045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:19.979027', 'step': 9045, 'epoch': 2} {'type': 'loss', 'content': 0.11264195293188095, 'timestamp': '2025-09-30 22:23:19.982260', 'step': 9046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.013159', 'step': 9046, 'epoch': 2} {'type': 'loss', 'content': 0.14726315438747406, 'timestamp': '2025-09-30 22:23:20.016391', 'step': 9047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.047153', 'step': 9047, 'epoch': 2} {'type': 'loss', 'content': 0.08233778178691864, 'timestamp': '2025-09-30 22:23:20.072143', 'step': 9048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.104695', 'step': 9048, 'epoch': 2} {'type': 'loss', 'content': 0.10248937457799911, 'timestamp': '2025-09-30 22:23:20.108598', 'step': 9049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.140170', 'step': 9049, 'epoch': 2} {'type': 'loss', 'content': 0.16526614129543304, 'timestamp': '2025-09-30 22:23:20.143396', 'step': 9050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.174317', 'step': 9050, 'epoch': 2} {'type': 'loss', 'content': 0.1309649795293808, 'timestamp': '2025-09-30 22:23:20.177134', 'step': 9051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.207614', 'step': 9051, 'epoch': 2} {'type': 'loss', 'content': 0.16499388217926025, 'timestamp': '2025-09-30 22:23:20.232045', 'step': 9052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.261817', 'step': 9052, 'epoch': 2} {'type': 'loss', 'content': 0.10456926375627518, 'timestamp': '2025-09-30 22:23:20.269337', 'step': 9053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.301830', 'step': 9053, 'epoch': 2} {'type': 'loss', 'content': 0.1806151568889618, 'timestamp': '2025-09-30 22:23:20.304606', 'step': 9054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.336062', 'step': 9054, 'epoch': 2} {'type': 'loss', 'content': 0.07215356081724167, 'timestamp': '2025-09-30 22:23:20.338407', 'step': 9055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.367923', 'step': 9055, 'epoch': 2} {'type': 'loss', 'content': 0.12159670144319534, 'timestamp': '2025-09-30 22:23:20.393001', 'step': 9056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.423490', 'step': 9056, 'epoch': 2} {'type': 'loss', 'content': 0.1481277495622635, 'timestamp': '2025-09-30 22:23:20.436274', 'step': 9057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.466525', 'step': 9057, 'epoch': 2} {'type': 'loss', 'content': 0.07735249400138855, 'timestamp': '2025-09-30 22:23:20.470584', 'step': 9058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.501515', 'step': 9058, 'epoch': 2} {'type': 'loss', 'content': 0.16598914563655853, 'timestamp': '2025-09-30 22:23:20.503735', 'step': 9059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.536721', 'step': 9059, 'epoch': 2} {'type': 'loss', 'content': 0.1537788063287735, 'timestamp': '2025-09-30 22:23:20.560424', 'step': 9060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.590581', 'step': 9060, 'epoch': 2} {'type': 'loss', 'content': 0.09266795217990875, 'timestamp': '2025-09-30 22:23:20.593777', 'step': 9061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.624618', 'step': 9061, 'epoch': 2} {'type': 'loss', 'content': 0.11254394799470901, 'timestamp': '2025-09-30 22:23:20.627528', 'step': 9062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:23:20.658427', 'step': 9062, 'epoch': 2} {'type': 'loss', 'content': 0.09587584435939789, 'timestamp': '2025-09-30 22:23:20.662991', 'step': 9063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.693123', 'step': 9063, 'epoch': 2} {'type': 'loss', 'content': 0.13096050918102264, 'timestamp': '2025-09-30 22:23:20.717189', 'step': 9064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:20.753213', 'step': 9064, 'epoch': 2} {'type': 'loss', 'content': 0.08667033165693283, 'timestamp': '2025-09-30 22:23:20.756265', 'step': 9065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.786490', 'step': 9065, 'epoch': 2} {'type': 'loss', 'content': 0.10404828935861588, 'timestamp': '2025-09-30 22:23:20.790418', 'step': 9066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:20.821163', 'step': 9066, 'epoch': 2} {'type': 'loss', 'content': 0.05828205496072769, 'timestamp': '2025-09-30 22:23:20.830992', 'step': 9067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.860400', 'step': 9067, 'epoch': 2} {'type': 'loss', 'content': 0.1577342003583908, 'timestamp': '2025-09-30 22:23:20.885891', 'step': 9068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:20.915733', 'step': 9068, 'epoch': 2} {'type': 'loss', 'content': 0.11253015697002411, 'timestamp': '2025-09-30 22:23:20.918538', 'step': 9069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.949108', 'step': 9069, 'epoch': 2} {'type': 'loss', 'content': 0.09606605023145676, 'timestamp': '2025-09-30 22:23:20.952155', 'step': 9070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:20.992648', 'step': 9070, 'epoch': 2} {'type': 'loss', 'content': 0.1646418571472168, 'timestamp': '2025-09-30 22:23:20.995959', 'step': 9071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:21.031856', 'step': 9071, 'epoch': 2} {'type': 'loss', 'content': 0.10880029201507568, 'timestamp': '2025-09-30 22:23:21.062791', 'step': 9072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.093702', 'step': 9072, 'epoch': 2} {'type': 'loss', 'content': 0.09724539518356323, 'timestamp': '2025-09-30 22:23:21.096212', 'step': 9073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:21.126253', 'step': 9073, 'epoch': 2} {'type': 'loss', 'content': 0.08713489770889282, 'timestamp': '2025-09-30 22:23:21.129045', 'step': 9074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.159559', 'step': 9074, 'epoch': 2} {'type': 'loss', 'content': 0.2592533826828003, 'timestamp': '2025-09-30 22:23:21.162376', 'step': 9075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.192165', 'step': 9075, 'epoch': 2} {'type': 'loss', 'content': 0.08474768698215485, 'timestamp': '2025-09-30 22:23:21.216059', 'step': 9076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.245775', 'step': 9076, 'epoch': 2} {'type': 'loss', 'content': 0.14193809032440186, 'timestamp': '2025-09-30 22:23:21.249214', 'step': 9077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.279739', 'step': 9077, 'epoch': 2} {'type': 'loss', 'content': 0.12796199321746826, 'timestamp': '2025-09-30 22:23:21.282616', 'step': 9078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:21.313866', 'step': 9078, 'epoch': 2} {'type': 'loss', 'content': 0.07438109070062637, 'timestamp': '2025-09-30 22:23:21.316711', 'step': 9079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:21.347546', 'step': 9079, 'epoch': 2} {'type': 'loss', 'content': 0.09579911828041077, 'timestamp': '2025-09-30 22:23:21.371408', 'step': 9080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:21.402005', 'step': 9080, 'epoch': 2} {'type': 'loss', 'content': 0.17594556510448456, 'timestamp': '2025-09-30 22:23:21.404425', 'step': 9081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:21.434026', 'step': 9081, 'epoch': 2} {'type': 'loss', 'content': 0.11147411167621613, 'timestamp': '2025-09-30 22:23:21.436772', 'step': 9082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:21.470573', 'step': 9082, 'epoch': 2} {'type': 'loss', 'content': 0.1414099931716919, 'timestamp': '2025-09-30 22:23:21.473785', 'step': 9083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.503526', 'step': 9083, 'epoch': 2} {'type': 'loss', 'content': 0.15111954510211945, 'timestamp': '2025-09-30 22:23:21.528095', 'step': 9084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.558337', 'step': 9084, 'epoch': 2} {'type': 'loss', 'content': 0.07643216848373413, 'timestamp': '2025-09-30 22:23:21.561702', 'step': 9085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.597073', 'step': 9085, 'epoch': 2} {'type': 'loss', 'content': 0.09584628790616989, 'timestamp': '2025-09-30 22:23:21.600958', 'step': 9086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:21.633298', 'step': 9086, 'epoch': 2} {'type': 'loss', 'content': 0.11374740302562714, 'timestamp': '2025-09-30 22:23:21.636231', 'step': 9087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.665992', 'step': 9087, 'epoch': 2} {'type': 'loss', 'content': 0.08343217521905899, 'timestamp': '2025-09-30 22:23:21.690005', 'step': 9088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.720642', 'step': 9088, 'epoch': 2} {'type': 'loss', 'content': 0.09068150073289871, 'timestamp': '2025-09-30 22:23:21.725401', 'step': 9089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:21.755832', 'step': 9089, 'epoch': 2} {'type': 'loss', 'content': 0.16065597534179688, 'timestamp': '2025-09-30 22:23:21.758328', 'step': 9090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:21.787921', 'step': 9090, 'epoch': 2} {'type': 'loss', 'content': 0.05188523977994919, 'timestamp': '2025-09-30 22:23:21.796251', 'step': 9091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:21.833151', 'step': 9091, 'epoch': 2} {'type': 'loss', 'content': 0.10385648906230927, 'timestamp': '2025-09-30 22:23:21.858017', 'step': 9092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:21.889638', 'step': 9092, 'epoch': 2} {'type': 'loss', 'content': 0.10069137811660767, 'timestamp': '2025-09-30 22:23:21.893015', 'step': 9093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.925550', 'step': 9093, 'epoch': 2} {'type': 'loss', 'content': 0.1605749875307083, 'timestamp': '2025-09-30 22:23:21.929160', 'step': 9094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:21.964915', 'step': 9094, 'epoch': 2} {'type': 'loss', 'content': 0.10599836707115173, 'timestamp': '2025-09-30 22:23:21.967954', 'step': 9095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:21.999604', 'step': 9095, 'epoch': 2} {'type': 'loss', 'content': 0.0965876504778862, 'timestamp': '2025-09-30 22:23:22.024086', 'step': 9096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.053930', 'step': 9096, 'epoch': 2} {'type': 'loss', 'content': 0.16457125544548035, 'timestamp': '2025-09-30 22:23:22.057061', 'step': 9097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.087874', 'step': 9097, 'epoch': 2} {'type': 'loss', 'content': 0.22100573778152466, 'timestamp': '2025-09-30 22:23:22.096354', 'step': 9098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:22.132072', 'step': 9098, 'epoch': 2} {'type': 'loss', 'content': 0.19017347693443298, 'timestamp': '2025-09-30 22:23:22.134721', 'step': 9099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.164953', 'step': 9099, 'epoch': 2} {'type': 'loss', 'content': 0.12437894940376282, 'timestamp': '2025-09-30 22:23:22.189225', 'step': 9100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:22.220502', 'step': 9100, 'epoch': 2} {'type': 'loss', 'content': 0.17186173796653748, 'timestamp': '2025-09-30 22:23:22.223191', 'step': 9101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.257674', 'step': 9101, 'epoch': 2} {'type': 'loss', 'content': 0.08838284760713577, 'timestamp': '2025-09-30 22:23:22.260413', 'step': 9102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.292993', 'step': 9102, 'epoch': 2} {'type': 'loss', 'content': 0.1082572340965271, 'timestamp': '2025-09-30 22:23:22.300238', 'step': 9103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.336617', 'step': 9103, 'epoch': 2} {'type': 'loss', 'content': 0.17053531110286713, 'timestamp': '2025-09-30 22:23:22.364138', 'step': 9104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.401475', 'step': 9104, 'epoch': 2} {'type': 'loss', 'content': 0.09569737315177917, 'timestamp': '2025-09-30 22:23:22.404429', 'step': 9105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.435619', 'step': 9105, 'epoch': 2} {'type': 'loss', 'content': 0.05694522336125374, 'timestamp': '2025-09-30 22:23:22.438455', 'step': 9106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.469569', 'step': 9106, 'epoch': 2} {'type': 'loss', 'content': 0.06274571269750595, 'timestamp': '2025-09-30 22:23:22.472072', 'step': 9107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:22.504687', 'step': 9107, 'epoch': 2} {'type': 'loss', 'content': 0.10428615659475327, 'timestamp': '2025-09-30 22:23:22.528899', 'step': 9108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.558858', 'step': 9108, 'epoch': 2} {'type': 'loss', 'content': 0.11414317786693573, 'timestamp': '2025-09-30 22:23:22.561616', 'step': 9109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.593012', 'step': 9109, 'epoch': 2} {'type': 'loss', 'content': 0.1415916085243225, 'timestamp': '2025-09-30 22:23:22.596138', 'step': 9110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.626305', 'step': 9110, 'epoch': 2} {'type': 'loss', 'content': 0.13558678328990936, 'timestamp': '2025-09-30 22:23:22.630011', 'step': 9111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.660678', 'step': 9111, 'epoch': 2} {'type': 'loss', 'content': 0.06201653555035591, 'timestamp': '2025-09-30 22:23:22.684451', 'step': 9112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.714042', 'step': 9112, 'epoch': 2} {'type': 'loss', 'content': 0.15303143858909607, 'timestamp': '2025-09-30 22:23:22.716576', 'step': 9113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.746851', 'step': 9113, 'epoch': 2} {'type': 'loss', 'content': 0.0765906497836113, 'timestamp': '2025-09-30 22:23:22.749239', 'step': 9114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.779893', 'step': 9114, 'epoch': 2} {'type': 'loss', 'content': 0.1560361087322235, 'timestamp': '2025-09-30 22:23:22.782669', 'step': 9115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.813645', 'step': 9115, 'epoch': 2} {'type': 'loss', 'content': 0.11662008613348007, 'timestamp': '2025-09-30 22:23:22.837861', 'step': 9116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:22.867614', 'step': 9116, 'epoch': 2} {'type': 'loss', 'content': 0.07139552384614944, 'timestamp': '2025-09-30 22:23:22.870566', 'step': 9117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:22.900191', 'step': 9117, 'epoch': 2} {'type': 'loss', 'content': 0.07842179387807846, 'timestamp': '2025-09-30 22:23:22.906143', 'step': 9118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.936612', 'step': 9118, 'epoch': 2} {'type': 'loss', 'content': 0.1188170462846756, 'timestamp': '2025-09-30 22:23:22.939299', 'step': 9119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:22.968962', 'step': 9119, 'epoch': 2} {'type': 'loss', 'content': 0.14016848802566528, 'timestamp': '2025-09-30 22:23:23.000481', 'step': 9120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:23.030635', 'step': 9120, 'epoch': 2} {'type': 'loss', 'content': 0.07090892642736435, 'timestamp': '2025-09-30 22:23:23.033329', 'step': 9121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:23.064012', 'step': 9121, 'epoch': 2} {'type': 'loss', 'content': 0.13559173047542572, 'timestamp': '2025-09-30 22:23:23.068728', 'step': 9122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:23.099577', 'step': 9122, 'epoch': 2} {'type': 'loss', 'content': 0.07435653358697891, 'timestamp': '2025-09-30 22:23:23.102429', 'step': 9123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:23.133244', 'step': 9123, 'epoch': 2} {'type': 'loss', 'content': 0.09158920496702194, 'timestamp': '2025-09-30 22:23:23.157971', 'step': 9124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.187957', 'step': 9124, 'epoch': 2} {'type': 'loss', 'content': 0.08867204189300537, 'timestamp': '2025-09-30 22:23:23.190853', 'step': 9125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:23.227193', 'step': 9125, 'epoch': 2} {'type': 'loss', 'content': 0.08392084389925003, 'timestamp': '2025-09-30 22:23:23.229825', 'step': 9126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.260279', 'step': 9126, 'epoch': 2} {'type': 'loss', 'content': 0.15131618082523346, 'timestamp': '2025-09-30 22:23:23.262895', 'step': 9127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:23.293812', 'step': 9127, 'epoch': 2} {'type': 'loss', 'content': 0.14515119791030884, 'timestamp': '2025-09-30 22:23:23.318542', 'step': 9128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.358608', 'step': 9128, 'epoch': 2} {'type': 'loss', 'content': 0.03791707754135132, 'timestamp': '2025-09-30 22:23:23.364565', 'step': 9129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.398866', 'step': 9129, 'epoch': 2} {'type': 'loss', 'content': 0.11183220148086548, 'timestamp': '2025-09-30 22:23:23.403113', 'step': 9130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.434952', 'step': 9130, 'epoch': 2} {'type': 'loss', 'content': 0.14234261214733124, 'timestamp': '2025-09-30 22:23:23.438467', 'step': 9131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:23.469665', 'step': 9131, 'epoch': 2} {'type': 'loss', 'content': 0.11366152763366699, 'timestamp': '2025-09-30 22:23:23.505278', 'step': 9132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:23.545081', 'step': 9132, 'epoch': 2} {'type': 'loss', 'content': 0.07430016994476318, 'timestamp': '2025-09-30 22:23:23.556773', 'step': 9133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.596881', 'step': 9133, 'epoch': 2} {'type': 'loss', 'content': 0.10046358406543732, 'timestamp': '2025-09-30 22:23:23.606987', 'step': 9134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:23.644644', 'step': 9134, 'epoch': 2} {'type': 'loss', 'content': 0.1678992211818695, 'timestamp': '2025-09-30 22:23:23.654063', 'step': 9135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.690344', 'step': 9135, 'epoch': 2} {'type': 'loss', 'content': 0.11995194852352142, 'timestamp': '2025-09-30 22:23:23.720323', 'step': 9136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.751547', 'step': 9136, 'epoch': 2} {'type': 'loss', 'content': 0.07284069061279297, 'timestamp': '2025-09-30 22:23:23.758073', 'step': 9137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.792772', 'step': 9137, 'epoch': 2} {'type': 'loss', 'content': 0.11820925027132034, 'timestamp': '2025-09-30 22:23:23.800114', 'step': 9138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:23:23.835264', 'step': 9138, 'epoch': 2} {'type': 'loss', 'content': 0.09576628357172012, 'timestamp': '2025-09-30 22:23:23.846292', 'step': 9139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:23.882576', 'step': 9139, 'epoch': 2} {'type': 'loss', 'content': 0.14246952533721924, 'timestamp': '2025-09-30 22:23:23.912472', 'step': 9140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.947869', 'step': 9140, 'epoch': 2} {'type': 'loss', 'content': 0.10112276673316956, 'timestamp': '2025-09-30 22:23:23.957350', 'step': 9141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:23.993878', 'step': 9141, 'epoch': 2} {'type': 'loss', 'content': 0.06603488326072693, 'timestamp': '2025-09-30 22:23:24.002460', 'step': 9142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:24.038605', 'step': 9142, 'epoch': 2} {'type': 'loss', 'content': 0.08413462340831757, 'timestamp': '2025-09-30 22:23:24.042341', 'step': 9143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:24.074275', 'step': 9143, 'epoch': 2} {'type': 'loss', 'content': 0.11506732553243637, 'timestamp': '2025-09-30 22:23:24.099847', 'step': 9144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:24.137547', 'step': 9144, 'epoch': 2} {'type': 'loss', 'content': 0.1913047581911087, 'timestamp': '2025-09-30 22:23:24.146942', 'step': 9145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:24.184105', 'step': 9145, 'epoch': 2} {'type': 'loss', 'content': 0.06572704017162323, 'timestamp': '2025-09-30 22:23:24.192638', 'step': 9146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:24.228519', 'step': 9146, 'epoch': 2} {'type': 'loss', 'content': 0.20721513032913208, 'timestamp': '2025-09-30 22:23:24.232028', 'step': 9147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:24.262622', 'step': 9147, 'epoch': 2} {'type': 'loss', 'content': 0.04700019210577011, 'timestamp': '2025-09-30 22:23:24.290843', 'step': 9148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:24.325163', 'step': 9148, 'epoch': 2} {'type': 'loss', 'content': 0.060737114399671555, 'timestamp': '2025-09-30 22:23:24.331311', 'step': 9149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:24.366066', 'step': 9149, 'epoch': 2} {'type': 'loss', 'content': 0.14951488375663757, 'timestamp': '2025-09-30 22:23:24.373306', 'step': 9150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:24.407847', 'step': 9150, 'epoch': 2} {'type': 'loss', 'content': 0.19937144219875336, 'timestamp': '2025-09-30 22:23:24.417000', 'step': 9151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:24.451363', 'step': 9151, 'epoch': 2} {'type': 'loss', 'content': 0.09826969355344772, 'timestamp': '2025-09-30 22:23:24.479553', 'step': 9152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:24.512056', 'step': 9152, 'epoch': 2} {'type': 'loss', 'content': 0.16495375335216522, 'timestamp': '2025-09-30 22:23:24.517368', 'step': 9153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:24.551824', 'step': 9153, 'epoch': 2} {'type': 'loss', 'content': 0.08953149616718292, 'timestamp': '2025-09-30 22:23:24.554865', 'step': 9154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:24.585632', 'step': 9154, 'epoch': 2} {'type': 'loss', 'content': 0.10410819947719574, 'timestamp': '2025-09-30 22:23:24.592001', 'step': 9155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:24.624812', 'step': 9155, 'epoch': 2} {'type': 'loss', 'content': 0.07179795950651169, 'timestamp': '2025-09-30 22:23:24.654107', 'step': 9156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:24.686772', 'step': 9156, 'epoch': 2} {'type': 'loss', 'content': 0.11281644552946091, 'timestamp': '2025-09-30 22:23:24.693979', 'step': 9157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:24.732319', 'step': 9157, 'epoch': 2} {'type': 'loss', 'content': 0.08121398091316223, 'timestamp': '2025-09-30 22:23:24.737006', 'step': 9158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:24.770100', 'step': 9158, 'epoch': 2} {'type': 'loss', 'content': 0.057058144360780716, 'timestamp': '2025-09-30 22:23:24.775560', 'step': 9159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:24.808380', 'step': 9159, 'epoch': 2} {'type': 'loss', 'content': 0.17396973073482513, 'timestamp': '2025-09-30 22:23:24.834835', 'step': 9160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:24.868406', 'step': 9160, 'epoch': 2} {'type': 'loss', 'content': 0.046710845082998276, 'timestamp': '2025-09-30 22:23:24.874131', 'step': 9161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:24.906499', 'step': 9161, 'epoch': 2} {'type': 'loss', 'content': 0.09384646266698837, 'timestamp': '2025-09-30 22:23:24.911903', 'step': 9162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:24.942033', 'step': 9162, 'epoch': 2} {'type': 'loss', 'content': 0.06627017259597778, 'timestamp': '2025-09-30 22:23:24.946702', 'step': 9163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:24.979298', 'step': 9163, 'epoch': 2} {'type': 'loss', 'content': 0.06522776931524277, 'timestamp': '2025-09-30 22:23:25.009710', 'step': 9164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:25.046357', 'step': 9164, 'epoch': 2} {'type': 'loss', 'content': 0.2196498066186905, 'timestamp': '2025-09-30 22:23:25.058363', 'step': 9165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:25.092317', 'step': 9165, 'epoch': 2} {'type': 'loss', 'content': 0.14636075496673584, 'timestamp': '2025-09-30 22:23:25.094821', 'step': 9166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:25.130517', 'step': 9166, 'epoch': 2} {'type': 'loss', 'content': 0.09385865181684494, 'timestamp': '2025-09-30 22:23:25.134900', 'step': 9167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.165411', 'step': 9167, 'epoch': 2} {'type': 'loss', 'content': 0.08718831837177277, 'timestamp': '2025-09-30 22:23:25.191189', 'step': 9168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:25.223326', 'step': 9168, 'epoch': 2} {'type': 'loss', 'content': 0.10624432563781738, 'timestamp': '2025-09-30 22:23:25.227923', 'step': 9169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.260139', 'step': 9169, 'epoch': 2} {'type': 'loss', 'content': 0.11472076922655106, 'timestamp': '2025-09-30 22:23:25.263485', 'step': 9170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:23:25.296156', 'step': 9170, 'epoch': 2} {'type': 'loss', 'content': 0.08813080191612244, 'timestamp': '2025-09-30 22:23:25.301550', 'step': 9171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.334075', 'step': 9171, 'epoch': 2} {'type': 'loss', 'content': 0.1280059814453125, 'timestamp': '2025-09-30 22:23:25.360734', 'step': 9172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.396498', 'step': 9172, 'epoch': 2} {'type': 'loss', 'content': 0.039161503314971924, 'timestamp': '2025-09-30 22:23:25.402980', 'step': 9173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:25.437284', 'step': 9173, 'epoch': 2} {'type': 'loss', 'content': 0.23326475918293, 'timestamp': '2025-09-30 22:23:25.442217', 'step': 9174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.471986', 'step': 9174, 'epoch': 2} {'type': 'loss', 'content': 0.2505831718444824, 'timestamp': '2025-09-30 22:23:25.474866', 'step': 9175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:25.505241', 'step': 9175, 'epoch': 2} {'type': 'loss', 'content': 0.06691103428602219, 'timestamp': '2025-09-30 22:23:25.530106', 'step': 9176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.562285', 'step': 9176, 'epoch': 2} {'type': 'loss', 'content': 0.06534069031476974, 'timestamp': '2025-09-30 22:23:25.566283', 'step': 9177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:25.597519', 'step': 9177, 'epoch': 2} {'type': 'loss', 'content': 0.11670546233654022, 'timestamp': '2025-09-30 22:23:25.602203', 'step': 9178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.635262', 'step': 9178, 'epoch': 2} {'type': 'loss', 'content': 0.2636096477508545, 'timestamp': '2025-09-30 22:23:25.639326', 'step': 9179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.670503', 'step': 9179, 'epoch': 2} {'type': 'loss', 'content': 0.08604735881090164, 'timestamp': '2025-09-30 22:23:25.696018', 'step': 9180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.730377', 'step': 9180, 'epoch': 2} {'type': 'loss', 'content': 0.060557495802640915, 'timestamp': '2025-09-30 22:23:25.736119', 'step': 9181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.770293', 'step': 9181, 'epoch': 2} {'type': 'loss', 'content': 0.09403059631586075, 'timestamp': '2025-09-30 22:23:25.774989', 'step': 9182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:25.805021', 'step': 9182, 'epoch': 2} {'type': 'loss', 'content': 0.1413591206073761, 'timestamp': '2025-09-30 22:23:25.810947', 'step': 9183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.844914', 'step': 9183, 'epoch': 2} {'type': 'loss', 'content': 0.11638329178094864, 'timestamp': '2025-09-30 22:23:25.871303', 'step': 9184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:25.905333', 'step': 9184, 'epoch': 2} {'type': 'loss', 'content': 0.07232657819986343, 'timestamp': '2025-09-30 22:23:25.911597', 'step': 9185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:25.945762', 'step': 9185, 'epoch': 2} {'type': 'loss', 'content': 0.08256153017282486, 'timestamp': '2025-09-30 22:23:25.950467', 'step': 9186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:25.982593', 'step': 9186, 'epoch': 2} {'type': 'loss', 'content': 0.11898615211248398, 'timestamp': '2025-09-30 22:23:25.990025', 'step': 9187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.023183', 'step': 9187, 'epoch': 2} {'type': 'loss', 'content': 0.09113822132349014, 'timestamp': '2025-09-30 22:23:26.050593', 'step': 9188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.081381', 'step': 9188, 'epoch': 2} {'type': 'loss', 'content': 0.17927095293998718, 'timestamp': '2025-09-30 22:23:26.085674', 'step': 9189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.121528', 'step': 9189, 'epoch': 2} {'type': 'loss', 'content': 0.08755090087652206, 'timestamp': '2025-09-30 22:23:26.125630', 'step': 9190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.162589', 'step': 9190, 'epoch': 2} {'type': 'loss', 'content': 0.04822637885808945, 'timestamp': '2025-09-30 22:23:26.169123', 'step': 9191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.203064', 'step': 9191, 'epoch': 2} {'type': 'loss', 'content': 0.1388167291879654, 'timestamp': '2025-09-30 22:23:26.234106', 'step': 9192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:26.268340', 'step': 9192, 'epoch': 2} {'type': 'loss', 'content': 0.20185045897960663, 'timestamp': '2025-09-30 22:23:26.275219', 'step': 9193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.308848', 'step': 9193, 'epoch': 2} {'type': 'loss', 'content': 0.14740987122058868, 'timestamp': '2025-09-30 22:23:26.316465', 'step': 9194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.360250', 'step': 9194, 'epoch': 2} {'type': 'loss', 'content': 0.1316124051809311, 'timestamp': '2025-09-30 22:23:26.367444', 'step': 9195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.402526', 'step': 9195, 'epoch': 2} {'type': 'loss', 'content': 0.1103237196803093, 'timestamp': '2025-09-30 22:23:26.431595', 'step': 9196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.463474', 'step': 9196, 'epoch': 2} {'type': 'loss', 'content': 0.07826530933380127, 'timestamp': '2025-09-30 22:23:26.467055', 'step': 9197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:26.497861', 'step': 9197, 'epoch': 2} {'type': 'loss', 'content': 0.11103818565607071, 'timestamp': '2025-09-30 22:23:26.504047', 'step': 9198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.535422', 'step': 9198, 'epoch': 2} {'type': 'loss', 'content': 0.1461467742919922, 'timestamp': '2025-09-30 22:23:26.545241', 'step': 9199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:26.580705', 'step': 9199, 'epoch': 2} {'type': 'loss', 'content': 0.17607581615447998, 'timestamp': '2025-09-30 22:23:26.610795', 'step': 9200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.642761', 'step': 9200, 'epoch': 2} {'type': 'loss', 'content': 0.066554956138134, 'timestamp': '2025-09-30 22:23:26.646728', 'step': 9201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.680476', 'step': 9201, 'epoch': 2} {'type': 'loss', 'content': 0.09937556087970734, 'timestamp': '2025-09-30 22:23:26.684517', 'step': 9202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.716821', 'step': 9202, 'epoch': 2} {'type': 'loss', 'content': 0.09535739570856094, 'timestamp': '2025-09-30 22:23:26.721146', 'step': 9203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.755538', 'step': 9203, 'epoch': 2} {'type': 'loss', 'content': 0.07043090462684631, 'timestamp': '2025-09-30 22:23:26.780400', 'step': 9204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.834515', 'step': 9204, 'epoch': 2} {'type': 'loss', 'content': 0.1654670089483261, 'timestamp': '2025-09-30 22:23:26.836882', 'step': 9205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:26.870580', 'step': 9205, 'epoch': 2} {'type': 'loss', 'content': 0.1386597603559494, 'timestamp': '2025-09-30 22:23:26.882724', 'step': 9206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.917412', 'step': 9206, 'epoch': 2} {'type': 'loss', 'content': 0.09624358266592026, 'timestamp': '2025-09-30 22:23:26.920023', 'step': 9207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:26.951943', 'step': 9207, 'epoch': 2} {'type': 'loss', 'content': 0.099956214427948, 'timestamp': '2025-09-30 22:23:26.976052', 'step': 9208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.006944', 'step': 9208, 'epoch': 2} {'type': 'loss', 'content': 0.08027266710996628, 'timestamp': '2025-09-30 22:23:27.009450', 'step': 9209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.039538', 'step': 9209, 'epoch': 2} {'type': 'loss', 'content': 0.12685717642307281, 'timestamp': '2025-09-30 22:23:27.043051', 'step': 9210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.074199', 'step': 9210, 'epoch': 2} {'type': 'loss', 'content': 0.14938022196292877, 'timestamp': '2025-09-30 22:23:27.079279', 'step': 9211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.116491', 'step': 9211, 'epoch': 2} {'type': 'loss', 'content': 0.13810217380523682, 'timestamp': '2025-09-30 22:23:27.143649', 'step': 9212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.179760', 'step': 9212, 'epoch': 2} {'type': 'loss', 'content': 0.13977894186973572, 'timestamp': '2025-09-30 22:23:27.189560', 'step': 9213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.225718', 'step': 9213, 'epoch': 2} {'type': 'loss', 'content': 0.1400105506181717, 'timestamp': '2025-09-30 22:23:27.230226', 'step': 9214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.262165', 'step': 9214, 'epoch': 2} {'type': 'loss', 'content': 0.07146690785884857, 'timestamp': '2025-09-30 22:23:27.266225', 'step': 9215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.297757', 'step': 9215, 'epoch': 2} {'type': 'loss', 'content': 0.06791028380393982, 'timestamp': '2025-09-30 22:23:27.322316', 'step': 9216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:27.354084', 'step': 9216, 'epoch': 2} {'type': 'loss', 'content': 0.0889093354344368, 'timestamp': '2025-09-30 22:23:27.357263', 'step': 9217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.388859', 'step': 9217, 'epoch': 2} {'type': 'loss', 'content': 0.1001615971326828, 'timestamp': '2025-09-30 22:23:27.394501', 'step': 9218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.425711', 'step': 9218, 'epoch': 2} {'type': 'loss', 'content': 0.08431190252304077, 'timestamp': '2025-09-30 22:23:27.429838', 'step': 9219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:27.461955', 'step': 9219, 'epoch': 2} {'type': 'loss', 'content': 0.10314539074897766, 'timestamp': '2025-09-30 22:23:27.486032', 'step': 9220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.522466', 'step': 9220, 'epoch': 2} {'type': 'loss', 'content': 0.09114977717399597, 'timestamp': '2025-09-30 22:23:27.526373', 'step': 9221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.556073', 'step': 9221, 'epoch': 2} {'type': 'loss', 'content': 0.10784997045993805, 'timestamp': '2025-09-30 22:23:27.561018', 'step': 9222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.592874', 'step': 9222, 'epoch': 2} {'type': 'loss', 'content': 0.1349460780620575, 'timestamp': '2025-09-30 22:23:27.596298', 'step': 9223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.630380', 'step': 9223, 'epoch': 2} {'type': 'loss', 'content': 0.16915898025035858, 'timestamp': '2025-09-30 22:23:27.655889', 'step': 9224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:27.687679', 'step': 9224, 'epoch': 2} {'type': 'loss', 'content': 0.11141230911016464, 'timestamp': '2025-09-30 22:23:27.691711', 'step': 9225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:27.724002', 'step': 9225, 'epoch': 2} {'type': 'loss', 'content': 0.09583963453769684, 'timestamp': '2025-09-30 22:23:27.733268', 'step': 9226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.769024', 'step': 9226, 'epoch': 2} {'type': 'loss', 'content': 0.09145514667034149, 'timestamp': '2025-09-30 22:23:27.773098', 'step': 9227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:27.806352', 'step': 9227, 'epoch': 2} {'type': 'loss', 'content': 0.12124592810869217, 'timestamp': '2025-09-30 22:23:27.835596', 'step': 9228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:27.869984', 'step': 9228, 'epoch': 2} {'type': 'loss', 'content': 0.09478610008955002, 'timestamp': '2025-09-30 22:23:27.876482', 'step': 9229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.910171', 'step': 9229, 'epoch': 2} {'type': 'loss', 'content': 0.11238478124141693, 'timestamp': '2025-09-30 22:23:27.915247', 'step': 9230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:27.948187', 'step': 9230, 'epoch': 2} {'type': 'loss', 'content': 0.09665457159280777, 'timestamp': '2025-09-30 22:23:27.952805', 'step': 9231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:27.984891', 'step': 9231, 'epoch': 2} {'type': 'loss', 'content': 0.06368161737918854, 'timestamp': '2025-09-30 22:23:28.009413', 'step': 9232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.040184', 'step': 9232, 'epoch': 2} {'type': 'loss', 'content': 0.12078974395990372, 'timestamp': '2025-09-30 22:23:28.043291', 'step': 9233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:28.073632', 'step': 9233, 'epoch': 2} {'type': 'loss', 'content': 0.15080896019935608, 'timestamp': '2025-09-30 22:23:28.078573', 'step': 9234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:28.109019', 'step': 9234, 'epoch': 2} {'type': 'loss', 'content': 0.16102831065654755, 'timestamp': '2025-09-30 22:23:28.112916', 'step': 9235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.144055', 'step': 9235, 'epoch': 2} {'type': 'loss', 'content': 0.1420040726661682, 'timestamp': '2025-09-30 22:23:28.168805', 'step': 9236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.201647', 'step': 9236, 'epoch': 2} {'type': 'loss', 'content': 0.15160413086414337, 'timestamp': '2025-09-30 22:23:28.206583', 'step': 9237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.240374', 'step': 9237, 'epoch': 2} {'type': 'loss', 'content': 0.12288075685501099, 'timestamp': '2025-09-30 22:23:28.242705', 'step': 9238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.272920', 'step': 9238, 'epoch': 2} {'type': 'loss', 'content': 0.148775115609169, 'timestamp': '2025-09-30 22:23:28.275936', 'step': 9239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:28.305935', 'step': 9239, 'epoch': 2} {'type': 'loss', 'content': 0.09582773596048355, 'timestamp': '2025-09-30 22:23:28.329888', 'step': 9240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.359850', 'step': 9240, 'epoch': 2} {'type': 'loss', 'content': 0.15988656878471375, 'timestamp': '2025-09-30 22:23:28.362048', 'step': 9241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.391514', 'step': 9241, 'epoch': 2} {'type': 'loss', 'content': 0.12419158965349197, 'timestamp': '2025-09-30 22:23:28.393691', 'step': 9242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.426961', 'step': 9242, 'epoch': 2} {'type': 'loss', 'content': 0.08568131178617477, 'timestamp': '2025-09-30 22:23:28.432192', 'step': 9243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.468130', 'step': 9243, 'epoch': 2} {'type': 'loss', 'content': 0.15842099487781525, 'timestamp': '2025-09-30 22:23:28.493470', 'step': 9244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:28.531464', 'step': 9244, 'epoch': 2} {'type': 'loss', 'content': 0.11597767472267151, 'timestamp': '2025-09-30 22:23:28.534044', 'step': 9245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.564349', 'step': 9245, 'epoch': 2} {'type': 'loss', 'content': 0.04566806182265282, 'timestamp': '2025-09-30 22:23:28.567014', 'step': 9246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.597287', 'step': 9246, 'epoch': 2} {'type': 'loss', 'content': 0.08891840279102325, 'timestamp': '2025-09-30 22:23:28.600130', 'step': 9247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:28.630489', 'step': 9247, 'epoch': 2} {'type': 'loss', 'content': 0.10305210947990417, 'timestamp': '2025-09-30 22:23:28.655833', 'step': 9248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.687567', 'step': 9248, 'epoch': 2} {'type': 'loss', 'content': 0.09570339322090149, 'timestamp': '2025-09-30 22:23:28.689733', 'step': 9249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:28.719698', 'step': 9249, 'epoch': 2} {'type': 'loss', 'content': 0.15174373984336853, 'timestamp': '2025-09-30 22:23:28.721866', 'step': 9250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:28.757948', 'step': 9250, 'epoch': 2} {'type': 'loss', 'content': 0.10151563584804535, 'timestamp': '2025-09-30 22:23:28.760972', 'step': 9251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.790770', 'step': 9251, 'epoch': 2} {'type': 'loss', 'content': 0.10424239933490753, 'timestamp': '2025-09-30 22:23:28.816712', 'step': 9252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.849386', 'step': 9252, 'epoch': 2} {'type': 'loss', 'content': 0.08428928256034851, 'timestamp': '2025-09-30 22:23:28.853625', 'step': 9253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.885695', 'step': 9253, 'epoch': 2} {'type': 'loss', 'content': 0.15419963002204895, 'timestamp': '2025-09-30 22:23:28.890004', 'step': 9254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:28.922875', 'step': 9254, 'epoch': 2} {'type': 'loss', 'content': 0.07103057205677032, 'timestamp': '2025-09-30 22:23:28.926597', 'step': 9255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:28.957962', 'step': 9255, 'epoch': 2} {'type': 'loss', 'content': 0.17297114431858063, 'timestamp': '2025-09-30 22:23:28.982703', 'step': 9256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.014498', 'step': 9256, 'epoch': 2} {'type': 'loss', 'content': 0.23515845835208893, 'timestamp': '2025-09-30 22:23:29.017807', 'step': 9257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:29.049521', 'step': 9257, 'epoch': 2} {'type': 'loss', 'content': 0.19256329536437988, 'timestamp': '2025-09-30 22:23:29.053650', 'step': 9258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.084375', 'step': 9258, 'epoch': 2} {'type': 'loss', 'content': 0.12982319295406342, 'timestamp': '2025-09-30 22:23:29.087752', 'step': 9259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:29.119775', 'step': 9259, 'epoch': 2} {'type': 'loss', 'content': 0.09227900207042694, 'timestamp': '2025-09-30 22:23:29.145556', 'step': 9260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.177684', 'step': 9260, 'epoch': 2} {'type': 'loss', 'content': 0.11693388968706131, 'timestamp': '2025-09-30 22:23:29.181491', 'step': 9261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:29.213709', 'step': 9261, 'epoch': 2} {'type': 'loss', 'content': 0.07773103564977646, 'timestamp': '2025-09-30 22:23:29.216384', 'step': 9262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.247938', 'step': 9262, 'epoch': 2} {'type': 'loss', 'content': 0.08437895774841309, 'timestamp': '2025-09-30 22:23:29.250647', 'step': 9263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.281450', 'step': 9263, 'epoch': 2} {'type': 'loss', 'content': 0.17111587524414062, 'timestamp': '2025-09-30 22:23:29.309745', 'step': 9264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:29.341435', 'step': 9264, 'epoch': 2} {'type': 'loss', 'content': 0.1662369966506958, 'timestamp': '2025-09-30 22:23:29.347058', 'step': 9265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:29.380248', 'step': 9265, 'epoch': 2} {'type': 'loss', 'content': 0.17882615327835083, 'timestamp': '2025-09-30 22:23:29.386406', 'step': 9266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.416740', 'step': 9266, 'epoch': 2} {'type': 'loss', 'content': 0.13672500848770142, 'timestamp': '2025-09-30 22:23:29.419973', 'step': 9267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:29.449973', 'step': 9267, 'epoch': 2} {'type': 'loss', 'content': 0.11292242258787155, 'timestamp': '2025-09-30 22:23:29.474109', 'step': 9268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.504761', 'step': 9268, 'epoch': 2} {'type': 'loss', 'content': 0.11364414542913437, 'timestamp': '2025-09-30 22:23:29.507168', 'step': 9269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:23:29.537396', 'step': 9269, 'epoch': 2} {'type': 'loss', 'content': 0.12727250158786774, 'timestamp': '2025-09-30 22:23:29.542404', 'step': 9270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:29.573369', 'step': 9270, 'epoch': 2} {'type': 'loss', 'content': 0.0719078928232193, 'timestamp': '2025-09-30 22:23:29.577051', 'step': 9271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.619425', 'step': 9271, 'epoch': 2} {'type': 'loss', 'content': 0.12658776342868805, 'timestamp': '2025-09-30 22:23:29.644750', 'step': 9272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:29.678366', 'step': 9272, 'epoch': 2} {'type': 'loss', 'content': 0.08571401983499527, 'timestamp': '2025-09-30 22:23:29.681628', 'step': 9273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:23:29.712795', 'step': 9273, 'epoch': 2} {'type': 'loss', 'content': 0.07904356718063354, 'timestamp': '2025-09-30 22:23:29.719576', 'step': 9274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.751047', 'step': 9274, 'epoch': 2} {'type': 'loss', 'content': 0.07929816842079163, 'timestamp': '2025-09-30 22:23:29.754647', 'step': 9275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.787831', 'step': 9275, 'epoch': 2} {'type': 'loss', 'content': 0.13311898708343506, 'timestamp': '2025-09-30 22:23:29.812948', 'step': 9276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.842995', 'step': 9276, 'epoch': 2} {'type': 'loss', 'content': 0.11500407755374908, 'timestamp': '2025-09-30 22:23:29.845600', 'step': 9277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.875474', 'step': 9277, 'epoch': 2} {'type': 'loss', 'content': 0.0639142319560051, 'timestamp': '2025-09-30 22:23:29.878940', 'step': 9278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:29.910764', 'step': 9278, 'epoch': 2} {'type': 'loss', 'content': 0.1451025903224945, 'timestamp': '2025-09-30 22:23:29.915144', 'step': 9279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:29.948456', 'step': 9279, 'epoch': 2} {'type': 'loss', 'content': 0.14592771232128143, 'timestamp': '2025-09-30 22:23:29.975029', 'step': 9280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.005878', 'step': 9280, 'epoch': 2} {'type': 'loss', 'content': 0.07239808887243271, 'timestamp': '2025-09-30 22:23:30.018612', 'step': 9281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.051982', 'step': 9281, 'epoch': 2} {'type': 'loss', 'content': 0.12593324482440948, 'timestamp': '2025-09-30 22:23:30.056008', 'step': 9282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.089340', 'step': 9282, 'epoch': 2} {'type': 'loss', 'content': 0.09712280333042145, 'timestamp': '2025-09-30 22:23:30.092539', 'step': 9283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.130304', 'step': 9283, 'epoch': 2} {'type': 'loss', 'content': 0.0939740538597107, 'timestamp': '2025-09-30 22:23:30.155954', 'step': 9284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:30.187931', 'step': 9284, 'epoch': 2} {'type': 'loss', 'content': 0.18294736742973328, 'timestamp': '2025-09-30 22:23:30.193589', 'step': 9285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.227783', 'step': 9285, 'epoch': 2} {'type': 'loss', 'content': 0.11800394207239151, 'timestamp': '2025-09-30 22:23:30.231988', 'step': 9286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.265368', 'step': 9286, 'epoch': 2} {'type': 'loss', 'content': 0.1477755755186081, 'timestamp': '2025-09-30 22:23:30.269136', 'step': 9287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.301184', 'step': 9287, 'epoch': 2} {'type': 'loss', 'content': 0.07089585065841675, 'timestamp': '2025-09-30 22:23:30.327333', 'step': 9288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.360978', 'step': 9288, 'epoch': 2} {'type': 'loss', 'content': 0.08784052729606628, 'timestamp': '2025-09-30 22:23:30.365025', 'step': 9289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.395894', 'step': 9289, 'epoch': 2} {'type': 'loss', 'content': 0.050029776990413666, 'timestamp': '2025-09-30 22:23:30.398855', 'step': 9290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.430668', 'step': 9290, 'epoch': 2} {'type': 'loss', 'content': 0.13817229866981506, 'timestamp': '2025-09-30 22:23:30.435734', 'step': 9291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.469178', 'step': 9291, 'epoch': 2} {'type': 'loss', 'content': 0.12907420098781586, 'timestamp': '2025-09-30 22:23:30.495631', 'step': 9292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:30.529727', 'step': 9292, 'epoch': 2} {'type': 'loss', 'content': 0.17790758609771729, 'timestamp': '2025-09-30 22:23:30.547384', 'step': 9293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.580344', 'step': 9293, 'epoch': 2} {'type': 'loss', 'content': 0.1539183109998703, 'timestamp': '2025-09-30 22:23:30.585248', 'step': 9294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.627935', 'step': 9294, 'epoch': 2} {'type': 'loss', 'content': 0.06767076998949051, 'timestamp': '2025-09-30 22:23:30.632461', 'step': 9295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.663816', 'step': 9295, 'epoch': 2} {'type': 'loss', 'content': 0.1630334109067917, 'timestamp': '2025-09-30 22:23:30.687612', 'step': 9296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.718667', 'step': 9296, 'epoch': 2} {'type': 'loss', 'content': 0.04571858048439026, 'timestamp': '2025-09-30 22:23:30.721974', 'step': 9297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:30.752106', 'step': 9297, 'epoch': 2} {'type': 'loss', 'content': 0.16994698345661163, 'timestamp': '2025-09-30 22:23:30.755764', 'step': 9298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.792769', 'step': 9298, 'epoch': 2} {'type': 'loss', 'content': 0.1675684005022049, 'timestamp': '2025-09-30 22:23:30.796555', 'step': 9299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.834223', 'step': 9299, 'epoch': 2} {'type': 'loss', 'content': 0.1977931261062622, 'timestamp': '2025-09-30 22:23:30.860355', 'step': 9300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:30.892517', 'step': 9300, 'epoch': 2} {'type': 'loss', 'content': 0.06571528315544128, 'timestamp': '2025-09-30 22:23:30.895441', 'step': 9301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:30.925241', 'step': 9301, 'epoch': 2} {'type': 'loss', 'content': 0.11131005734205246, 'timestamp': '2025-09-30 22:23:30.932117', 'step': 9302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:30.962880', 'step': 9302, 'epoch': 2} {'type': 'loss', 'content': 0.11526311933994293, 'timestamp': '2025-09-30 22:23:30.965807', 'step': 9303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.003983', 'step': 9303, 'epoch': 2} {'type': 'loss', 'content': 0.06701178848743439, 'timestamp': '2025-09-30 22:23:31.029074', 'step': 9304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:31.060170', 'step': 9304, 'epoch': 2} {'type': 'loss', 'content': 0.11930014938116074, 'timestamp': '2025-09-30 22:23:31.062944', 'step': 9305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.093727', 'step': 9305, 'epoch': 2} {'type': 'loss', 'content': 0.108942411839962, 'timestamp': '2025-09-30 22:23:31.096814', 'step': 9306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:31.128697', 'step': 9306, 'epoch': 2} {'type': 'loss', 'content': 0.09204857796430588, 'timestamp': '2025-09-30 22:23:31.132296', 'step': 9307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:31.163986', 'step': 9307, 'epoch': 2} {'type': 'loss', 'content': 0.12768618762493134, 'timestamp': '2025-09-30 22:23:31.188695', 'step': 9308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:31.221174', 'step': 9308, 'epoch': 2} {'type': 'loss', 'content': 0.11522965878248215, 'timestamp': '2025-09-30 22:23:31.225144', 'step': 9309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.255367', 'step': 9309, 'epoch': 2} {'type': 'loss', 'content': 0.09242735058069229, 'timestamp': '2025-09-30 22:23:31.259290', 'step': 9310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.294134', 'step': 9310, 'epoch': 2} {'type': 'loss', 'content': 0.06866717338562012, 'timestamp': '2025-09-30 22:23:31.297686', 'step': 9311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:31.336287', 'step': 9311, 'epoch': 2} {'type': 'loss', 'content': 0.15652815997600555, 'timestamp': '2025-09-30 22:23:31.362071', 'step': 9312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.395194', 'step': 9312, 'epoch': 2} {'type': 'loss', 'content': 0.13082432746887207, 'timestamp': '2025-09-30 22:23:31.401247', 'step': 9313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:31.433414', 'step': 9313, 'epoch': 2} {'type': 'loss', 'content': 0.08222402632236481, 'timestamp': '2025-09-30 22:23:31.436539', 'step': 9314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.468365', 'step': 9314, 'epoch': 2} {'type': 'loss', 'content': 0.09145305305719376, 'timestamp': '2025-09-30 22:23:31.471761', 'step': 9315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.502909', 'step': 9315, 'epoch': 2} {'type': 'loss', 'content': 0.08484228700399399, 'timestamp': '2025-09-30 22:23:31.528370', 'step': 9316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:31.557820', 'step': 9316, 'epoch': 2} {'type': 'loss', 'content': 0.10525675863027573, 'timestamp': '2025-09-30 22:23:31.561684', 'step': 9317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:31.591753', 'step': 9317, 'epoch': 2} {'type': 'loss', 'content': 0.05147743225097656, 'timestamp': '2025-09-30 22:23:31.595807', 'step': 9318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:31.627370', 'step': 9318, 'epoch': 2} {'type': 'loss', 'content': 0.07158256322145462, 'timestamp': '2025-09-30 22:23:31.636614', 'step': 9319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:31.668417', 'step': 9319, 'epoch': 2} {'type': 'loss', 'content': 0.1050407737493515, 'timestamp': '2025-09-30 22:23:31.693023', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:23:39.986723', 'step': 9320, 'epoch': 2} {'type': 'pplx', 'content': 11168.532551544584, 'timestamp': '2025-09-30 22:23:39.990539', 'step': 9320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:40.020292', 'step': 9320, 'epoch': 2} {'type': 'loss', 'content': 0.09527026861906052, 'timestamp': '2025-09-30 22:23:40.025670', 'step': 9321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.057292', 'step': 9321, 'epoch': 2} {'type': 'loss', 'content': 0.11623309552669525, 'timestamp': '2025-09-30 22:23:40.060248', 'step': 9322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:40.097286', 'step': 9322, 'epoch': 2} {'type': 'loss', 'content': 0.12425357848405838, 'timestamp': '2025-09-30 22:23:40.118106', 'step': 9323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.165366', 'step': 9323, 'epoch': 2} {'type': 'loss', 'content': 0.19354157149791718, 'timestamp': '2025-09-30 22:23:40.207204', 'step': 9324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.237952', 'step': 9324, 'epoch': 2} {'type': 'loss', 'content': 0.10683797299861908, 'timestamp': '2025-09-30 22:23:40.247562', 'step': 9325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.282778', 'step': 9325, 'epoch': 2} {'type': 'loss', 'content': 0.09638766944408417, 'timestamp': '2025-09-30 22:23:40.287989', 'step': 9326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.331835', 'step': 9326, 'epoch': 2} {'type': 'loss', 'content': 0.0704583153128624, 'timestamp': '2025-09-30 22:23:40.337242', 'step': 9327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.385611', 'step': 9327, 'epoch': 2} {'type': 'loss', 'content': 0.17202608287334442, 'timestamp': '2025-09-30 22:23:40.413067', 'step': 9328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.449892', 'step': 9328, 'epoch': 2} {'type': 'loss', 'content': 0.1042180061340332, 'timestamp': '2025-09-30 22:23:40.454925', 'step': 9329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.486771', 'step': 9329, 'epoch': 2} {'type': 'loss', 'content': 0.10617808997631073, 'timestamp': '2025-09-30 22:23:40.490120', 'step': 9330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.521884', 'step': 9330, 'epoch': 2} {'type': 'loss', 'content': 0.13301193714141846, 'timestamp': '2025-09-30 22:23:40.535681', 'step': 9331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.578382', 'step': 9331, 'epoch': 2} {'type': 'loss', 'content': 0.11272218078374863, 'timestamp': '2025-09-30 22:23:40.609933', 'step': 9332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.641619', 'step': 9332, 'epoch': 2} {'type': 'loss', 'content': 0.13239625096321106, 'timestamp': '2025-09-30 22:23:40.646164', 'step': 9333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:40.687890', 'step': 9333, 'epoch': 2} {'type': 'loss', 'content': 0.059744417667388916, 'timestamp': '2025-09-30 22:23:40.690980', 'step': 9334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:40.721186', 'step': 9334, 'epoch': 2} {'type': 'loss', 'content': 0.09223702549934387, 'timestamp': '2025-09-30 22:23:40.724346', 'step': 9335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.765197', 'step': 9335, 'epoch': 2} {'type': 'loss', 'content': 0.0740211233496666, 'timestamp': '2025-09-30 22:23:40.791494', 'step': 9336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:40.835561', 'step': 9336, 'epoch': 2} {'type': 'loss', 'content': 0.11319558322429657, 'timestamp': '2025-09-30 22:23:40.838028', 'step': 9337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.875664', 'step': 9337, 'epoch': 2} {'type': 'loss', 'content': 0.07119161635637283, 'timestamp': '2025-09-30 22:23:40.878981', 'step': 9338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.911328', 'step': 9338, 'epoch': 2} {'type': 'loss', 'content': 0.07733864337205887, 'timestamp': '2025-09-30 22:23:40.921085', 'step': 9339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:40.953882', 'step': 9339, 'epoch': 2} {'type': 'loss', 'content': 0.07813972979784012, 'timestamp': '2025-09-30 22:23:40.986974', 'step': 9340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.018653', 'step': 9340, 'epoch': 2} {'type': 'loss', 'content': 0.12070140242576599, 'timestamp': '2025-09-30 22:23:41.021231', 'step': 9341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.051893', 'step': 9341, 'epoch': 2} {'type': 'loss', 'content': 0.045101091265678406, 'timestamp': '2025-09-30 22:23:41.055430', 'step': 9342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.085905', 'step': 9342, 'epoch': 2} {'type': 'loss', 'content': 0.11821047961711884, 'timestamp': '2025-09-30 22:23:41.089150', 'step': 9343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.120187', 'step': 9343, 'epoch': 2} {'type': 'loss', 'content': 0.06897938996553421, 'timestamp': '2025-09-30 22:23:41.146286', 'step': 9344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.180105', 'step': 9344, 'epoch': 2} {'type': 'loss', 'content': 0.09064340591430664, 'timestamp': '2025-09-30 22:23:41.184228', 'step': 9345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:41.217065', 'step': 9345, 'epoch': 2} {'type': 'loss', 'content': 0.06302601099014282, 'timestamp': '2025-09-30 22:23:41.221910', 'step': 9346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:41.254236', 'step': 9346, 'epoch': 2} {'type': 'loss', 'content': 0.1489911526441574, 'timestamp': '2025-09-30 22:23:41.259590', 'step': 9347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:41.293269', 'step': 9347, 'epoch': 2} {'type': 'loss', 'content': 0.1264161318540573, 'timestamp': '2025-09-30 22:23:41.320697', 'step': 9348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:41.353608', 'step': 9348, 'epoch': 2} {'type': 'loss', 'content': 0.061863455921411514, 'timestamp': '2025-09-30 22:23:41.357351', 'step': 9349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.390724', 'step': 9349, 'epoch': 2} {'type': 'loss', 'content': 0.17609521746635437, 'timestamp': '2025-09-30 22:23:41.399649', 'step': 9350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:41.432593', 'step': 9350, 'epoch': 2} {'type': 'loss', 'content': 0.09867209196090698, 'timestamp': '2025-09-30 22:23:41.438384', 'step': 9351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.471164', 'step': 9351, 'epoch': 2} {'type': 'loss', 'content': 0.07547876238822937, 'timestamp': '2025-09-30 22:23:41.520838', 'step': 9352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.552051', 'step': 9352, 'epoch': 2} {'type': 'loss', 'content': 0.06188889220356941, 'timestamp': '2025-09-30 22:23:41.558366', 'step': 9353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:41.595558', 'step': 9353, 'epoch': 2} {'type': 'loss', 'content': 0.09442095458507538, 'timestamp': '2025-09-30 22:23:41.611263', 'step': 9354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:41.646704', 'step': 9354, 'epoch': 2} {'type': 'loss', 'content': 0.10241887718439102, 'timestamp': '2025-09-30 22:23:41.664363', 'step': 9355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:41.702468', 'step': 9355, 'epoch': 2} {'type': 'loss', 'content': 0.13964124023914337, 'timestamp': '2025-09-30 22:23:41.734331', 'step': 9356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:41.765292', 'step': 9356, 'epoch': 2} {'type': 'loss', 'content': 0.057357337325811386, 'timestamp': '2025-09-30 22:23:41.770369', 'step': 9357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:41.811088', 'step': 9357, 'epoch': 2} {'type': 'loss', 'content': 0.05752522125840187, 'timestamp': '2025-09-30 22:23:41.816066', 'step': 9358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:41.849528', 'step': 9358, 'epoch': 2} {'type': 'loss', 'content': 0.1046929582953453, 'timestamp': '2025-09-30 22:23:41.856387', 'step': 9359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:41.892157', 'step': 9359, 'epoch': 2} {'type': 'loss', 'content': 0.11445216834545135, 'timestamp': '2025-09-30 22:23:41.917255', 'step': 9360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:41.947825', 'step': 9360, 'epoch': 2} {'type': 'loss', 'content': 0.150625541806221, 'timestamp': '2025-09-30 22:23:41.951622', 'step': 9361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:41.984646', 'step': 9361, 'epoch': 2} {'type': 'loss', 'content': 0.17187979817390442, 'timestamp': '2025-09-30 22:23:41.987533', 'step': 9362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.018916', 'step': 9362, 'epoch': 2} {'type': 'loss', 'content': 0.21346110105514526, 'timestamp': '2025-09-30 22:23:42.022815', 'step': 9363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:42.056781', 'step': 9363, 'epoch': 2} {'type': 'loss', 'content': 0.2199687361717224, 'timestamp': '2025-09-30 22:23:42.081313', 'step': 9364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.112270', 'step': 9364, 'epoch': 2} {'type': 'loss', 'content': 0.08805877715349197, 'timestamp': '2025-09-30 22:23:42.117640', 'step': 9365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.150429', 'step': 9365, 'epoch': 2} {'type': 'loss', 'content': 0.17732709646224976, 'timestamp': '2025-09-30 22:23:42.154890', 'step': 9366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.199669', 'step': 9366, 'epoch': 2} {'type': 'loss', 'content': 0.15074369311332703, 'timestamp': '2025-09-30 22:23:42.204162', 'step': 9367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.235670', 'step': 9367, 'epoch': 2} {'type': 'loss', 'content': 0.1381312757730484, 'timestamp': '2025-09-30 22:23:42.259996', 'step': 9368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.293327', 'step': 9368, 'epoch': 2} {'type': 'loss', 'content': 0.16358613967895508, 'timestamp': '2025-09-30 22:23:42.295706', 'step': 9369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:42.327730', 'step': 9369, 'epoch': 2} {'type': 'loss', 'content': 0.134124293923378, 'timestamp': '2025-09-30 22:23:42.332959', 'step': 9370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.365027', 'step': 9370, 'epoch': 2} {'type': 'loss', 'content': 0.09940339624881744, 'timestamp': '2025-09-30 22:23:42.368357', 'step': 9371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.409605', 'step': 9371, 'epoch': 2} {'type': 'loss', 'content': 0.1126023605465889, 'timestamp': '2025-09-30 22:23:42.439563', 'step': 9372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.471416', 'step': 9372, 'epoch': 2} {'type': 'loss', 'content': 0.07859038561582565, 'timestamp': '2025-09-30 22:23:42.475991', 'step': 9373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.507213', 'step': 9373, 'epoch': 2} {'type': 'loss', 'content': 0.11977245658636093, 'timestamp': '2025-09-30 22:23:42.509800', 'step': 9374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:42.541655', 'step': 9374, 'epoch': 2} {'type': 'loss', 'content': 0.15991581976413727, 'timestamp': '2025-09-30 22:23:42.548157', 'step': 9375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:42.580617', 'step': 9375, 'epoch': 2} {'type': 'loss', 'content': 0.12386436015367508, 'timestamp': '2025-09-30 22:23:42.611580', 'step': 9376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.642521', 'step': 9376, 'epoch': 2} {'type': 'loss', 'content': 0.1191108375787735, 'timestamp': '2025-09-30 22:23:42.646114', 'step': 9377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:42.679148', 'step': 9377, 'epoch': 2} {'type': 'loss', 'content': 0.15469805896282196, 'timestamp': '2025-09-30 22:23:42.684901', 'step': 9378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.718589', 'step': 9378, 'epoch': 2} {'type': 'loss', 'content': 0.1092413067817688, 'timestamp': '2025-09-30 22:23:42.722631', 'step': 9379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.754478', 'step': 9379, 'epoch': 2} {'type': 'loss', 'content': 0.07683976739645004, 'timestamp': '2025-09-30 22:23:42.778044', 'step': 9380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.821018', 'step': 9380, 'epoch': 2} {'type': 'loss', 'content': 0.15467357635498047, 'timestamp': '2025-09-30 22:23:42.826345', 'step': 9381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.860504', 'step': 9381, 'epoch': 2} {'type': 'loss', 'content': 0.08542321622371674, 'timestamp': '2025-09-30 22:23:42.864048', 'step': 9382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:42.895581', 'step': 9382, 'epoch': 2} {'type': 'loss', 'content': 0.07297760993242264, 'timestamp': '2025-09-30 22:23:42.898116', 'step': 9383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:42.929493', 'step': 9383, 'epoch': 2} {'type': 'loss', 'content': 0.17112869024276733, 'timestamp': '2025-09-30 22:23:42.955488', 'step': 9384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:42.989320', 'step': 9384, 'epoch': 2} {'type': 'loss', 'content': 0.1756872832775116, 'timestamp': '2025-09-30 22:23:42.992588', 'step': 9385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:43.024450', 'step': 9385, 'epoch': 2} {'type': 'loss', 'content': 0.19812040030956268, 'timestamp': '2025-09-30 22:23:43.027386', 'step': 9386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:43.059088', 'step': 9386, 'epoch': 2} {'type': 'loss', 'content': 0.051468465477228165, 'timestamp': '2025-09-30 22:23:43.063062', 'step': 9387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.102260', 'step': 9387, 'epoch': 2} {'type': 'loss', 'content': 0.16598045825958252, 'timestamp': '2025-09-30 22:23:43.126697', 'step': 9388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.170125', 'step': 9388, 'epoch': 2} {'type': 'loss', 'content': 0.13459551334381104, 'timestamp': '2025-09-30 22:23:43.172613', 'step': 9389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.204468', 'step': 9389, 'epoch': 2} {'type': 'loss', 'content': 0.12097527086734772, 'timestamp': '2025-09-30 22:23:43.208897', 'step': 9390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:43.241992', 'step': 9390, 'epoch': 2} {'type': 'loss', 'content': 0.15844856202602386, 'timestamp': '2025-09-30 22:23:43.245409', 'step': 9391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.276920', 'step': 9391, 'epoch': 2} {'type': 'loss', 'content': 0.1270395666360855, 'timestamp': '2025-09-30 22:23:43.301474', 'step': 9392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.333326', 'step': 9392, 'epoch': 2} {'type': 'loss', 'content': 0.16720949113368988, 'timestamp': '2025-09-30 22:23:43.337808', 'step': 9393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.369486', 'step': 9393, 'epoch': 2} {'type': 'loss', 'content': 0.10564617067575455, 'timestamp': '2025-09-30 22:23:43.371772', 'step': 9394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:43.403220', 'step': 9394, 'epoch': 2} {'type': 'loss', 'content': 0.10326606780290604, 'timestamp': '2025-09-30 22:23:43.405455', 'step': 9395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.448102', 'step': 9395, 'epoch': 2} {'type': 'loss', 'content': 0.1653236746788025, 'timestamp': '2025-09-30 22:23:43.472171', 'step': 9396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:43.504647', 'step': 9396, 'epoch': 2} {'type': 'loss', 'content': 0.07689012587070465, 'timestamp': '2025-09-30 22:23:43.509701', 'step': 9397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.545778', 'step': 9397, 'epoch': 2} {'type': 'loss', 'content': 0.16432957351207733, 'timestamp': '2025-09-30 22:23:43.549339', 'step': 9398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.583201', 'step': 9398, 'epoch': 2} {'type': 'loss', 'content': 0.16296958923339844, 'timestamp': '2025-09-30 22:23:43.586890', 'step': 9399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.619675', 'step': 9399, 'epoch': 2} {'type': 'loss', 'content': 0.15142391622066498, 'timestamp': '2025-09-30 22:23:43.645107', 'step': 9400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.684354', 'step': 9400, 'epoch': 2} {'type': 'loss', 'content': 0.18782378733158112, 'timestamp': '2025-09-30 22:23:43.690373', 'step': 9401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:43.722740', 'step': 9401, 'epoch': 2} {'type': 'loss', 'content': 0.14504580199718475, 'timestamp': '2025-09-30 22:23:43.733171', 'step': 9402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.766900', 'step': 9402, 'epoch': 2} {'type': 'loss', 'content': 0.11843325942754745, 'timestamp': '2025-09-30 22:23:43.769284', 'step': 9403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.802003', 'step': 9403, 'epoch': 2} {'type': 'loss', 'content': 0.19439946115016937, 'timestamp': '2025-09-30 22:23:43.829104', 'step': 9404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.862229', 'step': 9404, 'epoch': 2} {'type': 'loss', 'content': 0.20222607254981995, 'timestamp': '2025-09-30 22:23:43.865686', 'step': 9405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.897472', 'step': 9405, 'epoch': 2} {'type': 'loss', 'content': 0.10532669723033905, 'timestamp': '2025-09-30 22:23:43.910224', 'step': 9406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:43.944778', 'step': 9406, 'epoch': 2} {'type': 'loss', 'content': 0.14187175035476685, 'timestamp': '2025-09-30 22:23:43.947181', 'step': 9407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:43.979765', 'step': 9407, 'epoch': 2} {'type': 'loss', 'content': 0.12296267598867416, 'timestamp': '2025-09-30 22:23:44.004010', 'step': 9408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.035986', 'step': 9408, 'epoch': 2} {'type': 'loss', 'content': 0.24507389962673187, 'timestamp': '2025-09-30 22:23:44.039459', 'step': 9409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.078469', 'step': 9409, 'epoch': 2} {'type': 'loss', 'content': 0.1133040189743042, 'timestamp': '2025-09-30 22:23:44.083575', 'step': 9410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.120789', 'step': 9410, 'epoch': 2} {'type': 'loss', 'content': 0.23045852780342102, 'timestamp': '2025-09-30 22:23:44.125098', 'step': 9411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.157556', 'step': 9411, 'epoch': 2} {'type': 'loss', 'content': 0.08280850946903229, 'timestamp': '2025-09-30 22:23:44.181832', 'step': 9412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:44.215326', 'step': 9412, 'epoch': 2} {'type': 'loss', 'content': 0.12496915459632874, 'timestamp': '2025-09-30 22:23:44.218526', 'step': 9413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.259836', 'step': 9413, 'epoch': 2} {'type': 'loss', 'content': 0.10342057794332504, 'timestamp': '2025-09-30 22:23:44.265965', 'step': 9414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.301723', 'step': 9414, 'epoch': 2} {'type': 'loss', 'content': 0.10210147500038147, 'timestamp': '2025-09-30 22:23:44.305490', 'step': 9415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:44.341667', 'step': 9415, 'epoch': 2} {'type': 'loss', 'content': 0.10385546088218689, 'timestamp': '2025-09-30 22:23:44.369498', 'step': 9416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.401526', 'step': 9416, 'epoch': 2} {'type': 'loss', 'content': 0.1351272463798523, 'timestamp': '2025-09-30 22:23:44.406253', 'step': 9417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.454091', 'step': 9417, 'epoch': 2} {'type': 'loss', 'content': 0.08422286808490753, 'timestamp': '2025-09-30 22:23:44.458543', 'step': 9418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:44.493349', 'step': 9418, 'epoch': 2} {'type': 'loss', 'content': 0.1097441166639328, 'timestamp': '2025-09-30 22:23:44.496644', 'step': 9419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:44.530592', 'step': 9419, 'epoch': 2} {'type': 'loss', 'content': 0.10788663476705551, 'timestamp': '2025-09-30 22:23:44.556254', 'step': 9420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.591563', 'step': 9420, 'epoch': 2} {'type': 'loss', 'content': 0.09474959224462509, 'timestamp': '2025-09-30 22:23:44.595075', 'step': 9421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:44.627048', 'step': 9421, 'epoch': 2} {'type': 'loss', 'content': 0.10161007940769196, 'timestamp': '2025-09-30 22:23:44.635679', 'step': 9422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:44.673557', 'step': 9422, 'epoch': 2} {'type': 'loss', 'content': 0.08221859484910965, 'timestamp': '2025-09-30 22:23:44.679662', 'step': 9423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:44.716104', 'step': 9423, 'epoch': 2} {'type': 'loss', 'content': 0.0831507220864296, 'timestamp': '2025-09-30 22:23:44.740783', 'step': 9424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.775004', 'step': 9424, 'epoch': 2} {'type': 'loss', 'content': 0.07213934510946274, 'timestamp': '2025-09-30 22:23:44.778702', 'step': 9425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.814718', 'step': 9425, 'epoch': 2} {'type': 'loss', 'content': 0.10204078257083893, 'timestamp': '2025-09-30 22:23:44.818862', 'step': 9426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.853246', 'step': 9426, 'epoch': 2} {'type': 'loss', 'content': 0.24192506074905396, 'timestamp': '2025-09-30 22:23:44.862639', 'step': 9427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:44.898237', 'step': 9427, 'epoch': 2} {'type': 'loss', 'content': 0.12215646356344223, 'timestamp': '2025-09-30 22:23:44.926638', 'step': 9428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:44.959867', 'step': 9428, 'epoch': 2} {'type': 'loss', 'content': 0.07950825989246368, 'timestamp': '2025-09-30 22:23:44.964490', 'step': 9429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.003629', 'step': 9429, 'epoch': 2} {'type': 'loss', 'content': 0.11425391584634781, 'timestamp': '2025-09-30 22:23:45.013039', 'step': 9430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.044980', 'step': 9430, 'epoch': 2} {'type': 'loss', 'content': 0.14193390309810638, 'timestamp': '2025-09-30 22:23:45.050419', 'step': 9431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.083606', 'step': 9431, 'epoch': 2} {'type': 'loss', 'content': 0.08776339888572693, 'timestamp': '2025-09-30 22:23:45.112750', 'step': 9432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.155719', 'step': 9432, 'epoch': 2} {'type': 'loss', 'content': 0.10349056124687195, 'timestamp': '2025-09-30 22:23:45.172122', 'step': 9433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.209567', 'step': 9433, 'epoch': 2} {'type': 'loss', 'content': 0.11020056903362274, 'timestamp': '2025-09-30 22:23:45.216536', 'step': 9434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:45.249366', 'step': 9434, 'epoch': 2} {'type': 'loss', 'content': 0.07949307560920715, 'timestamp': '2025-09-30 22:23:45.258343', 'step': 9435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:45.318789', 'step': 9435, 'epoch': 2} {'type': 'loss', 'content': 0.110284723341465, 'timestamp': '2025-09-30 22:23:45.343828', 'step': 9436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:45.382333', 'step': 9436, 'epoch': 2} {'type': 'loss', 'content': 0.10133388638496399, 'timestamp': '2025-09-30 22:23:45.389021', 'step': 9437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.423874', 'step': 9437, 'epoch': 2} {'type': 'loss', 'content': 0.11988047510385513, 'timestamp': '2025-09-30 22:23:45.426831', 'step': 9438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.461105', 'step': 9438, 'epoch': 2} {'type': 'loss', 'content': 0.1745927333831787, 'timestamp': '2025-09-30 22:23:45.463680', 'step': 9439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.494719', 'step': 9439, 'epoch': 2} {'type': 'loss', 'content': 0.11301770061254501, 'timestamp': '2025-09-30 22:23:45.520446', 'step': 9440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.550522', 'step': 9440, 'epoch': 2} {'type': 'loss', 'content': 0.11059919744729996, 'timestamp': '2025-09-30 22:23:45.555198', 'step': 9441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:45.596587', 'step': 9441, 'epoch': 2} {'type': 'loss', 'content': 0.18724794685840607, 'timestamp': '2025-09-30 22:23:45.599470', 'step': 9442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.630315', 'step': 9442, 'epoch': 2} {'type': 'loss', 'content': 0.11424008756875992, 'timestamp': '2025-09-30 22:23:45.633239', 'step': 9443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:45.664935', 'step': 9443, 'epoch': 2} {'type': 'loss', 'content': 0.12859421968460083, 'timestamp': '2025-09-30 22:23:45.691801', 'step': 9444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.723276', 'step': 9444, 'epoch': 2} {'type': 'loss', 'content': 0.06777209788560867, 'timestamp': '2025-09-30 22:23:45.725959', 'step': 9445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:45.756697', 'step': 9445, 'epoch': 2} {'type': 'loss', 'content': 0.07766740024089813, 'timestamp': '2025-09-30 22:23:45.759264', 'step': 9446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.790092', 'step': 9446, 'epoch': 2} {'type': 'loss', 'content': 0.11245997250080109, 'timestamp': '2025-09-30 22:23:45.804858', 'step': 9447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:45.840269', 'step': 9447, 'epoch': 2} {'type': 'loss', 'content': 0.10447460412979126, 'timestamp': '2025-09-30 22:23:45.870649', 'step': 9448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:45.901181', 'step': 9448, 'epoch': 2} {'type': 'loss', 'content': 0.18974041938781738, 'timestamp': '2025-09-30 22:23:45.905466', 'step': 9449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:45.936722', 'step': 9449, 'epoch': 2} {'type': 'loss', 'content': 0.06339818984270096, 'timestamp': '2025-09-30 22:23:45.940772', 'step': 9450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:45.973450', 'step': 9450, 'epoch': 2} {'type': 'loss', 'content': 0.11245305091142654, 'timestamp': '2025-09-30 22:23:45.976296', 'step': 9451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.008619', 'step': 9451, 'epoch': 2} {'type': 'loss', 'content': 0.08245635032653809, 'timestamp': '2025-09-30 22:23:46.032476', 'step': 9452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.063174', 'step': 9452, 'epoch': 2} {'type': 'loss', 'content': 0.08176806569099426, 'timestamp': '2025-09-30 22:23:46.066901', 'step': 9453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:46.097713', 'step': 9453, 'epoch': 2} {'type': 'loss', 'content': 0.144395112991333, 'timestamp': '2025-09-30 22:23:46.100931', 'step': 9454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.132070', 'step': 9454, 'epoch': 2} {'type': 'loss', 'content': 0.16540881991386414, 'timestamp': '2025-09-30 22:23:46.134225', 'step': 9455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.165923', 'step': 9455, 'epoch': 2} {'type': 'loss', 'content': 0.12165358662605286, 'timestamp': '2025-09-30 22:23:46.203748', 'step': 9456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:46.239039', 'step': 9456, 'epoch': 2} {'type': 'loss', 'content': 0.18498344719409943, 'timestamp': '2025-09-30 22:23:46.243564', 'step': 9457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:46.275038', 'step': 9457, 'epoch': 2} {'type': 'loss', 'content': 0.09637955576181412, 'timestamp': '2025-09-30 22:23:46.278428', 'step': 9458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.312579', 'step': 9458, 'epoch': 2} {'type': 'loss', 'content': 0.07907655835151672, 'timestamp': '2025-09-30 22:23:46.315687', 'step': 9459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:46.346841', 'step': 9459, 'epoch': 2} {'type': 'loss', 'content': 0.09206326305866241, 'timestamp': '2025-09-30 22:23:46.371168', 'step': 9460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.401805', 'step': 9460, 'epoch': 2} {'type': 'loss', 'content': 0.0544464997947216, 'timestamp': '2025-09-30 22:23:46.405265', 'step': 9461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:46.436391', 'step': 9461, 'epoch': 2} {'type': 'loss', 'content': 0.1464933156967163, 'timestamp': '2025-09-30 22:23:46.439781', 'step': 9462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.471193', 'step': 9462, 'epoch': 2} {'type': 'loss', 'content': 0.13546766340732574, 'timestamp': '2025-09-30 22:23:46.474578', 'step': 9463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:46.505384', 'step': 9463, 'epoch': 2} {'type': 'loss', 'content': 0.1589074432849884, 'timestamp': '2025-09-30 22:23:46.534622', 'step': 9464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.566163', 'step': 9464, 'epoch': 2} {'type': 'loss', 'content': 0.05976376682519913, 'timestamp': '2025-09-30 22:23:46.569288', 'step': 9465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.600136', 'step': 9465, 'epoch': 2} {'type': 'loss', 'content': 0.0841706395149231, 'timestamp': '2025-09-30 22:23:46.603726', 'step': 9466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:46.637201', 'step': 9466, 'epoch': 2} {'type': 'loss', 'content': 0.18804596364498138, 'timestamp': '2025-09-30 22:23:46.640679', 'step': 9467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:46.673745', 'step': 9467, 'epoch': 2} {'type': 'loss', 'content': 0.1724575012922287, 'timestamp': '2025-09-30 22:23:46.698650', 'step': 9468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.731759', 'step': 9468, 'epoch': 2} {'type': 'loss', 'content': 0.08393348008394241, 'timestamp': '2025-09-30 22:23:46.741617', 'step': 9469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.772450', 'step': 9469, 'epoch': 2} {'type': 'loss', 'content': 0.14297732710838318, 'timestamp': '2025-09-30 22:23:46.775499', 'step': 9470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:46.806875', 'step': 9470, 'epoch': 2} {'type': 'loss', 'content': 0.06467284262180328, 'timestamp': '2025-09-30 22:23:46.810369', 'step': 9471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.844863', 'step': 9471, 'epoch': 2} {'type': 'loss', 'content': 0.11029595136642456, 'timestamp': '2025-09-30 22:23:46.869187', 'step': 9472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:46.901001', 'step': 9472, 'epoch': 2} {'type': 'loss', 'content': 0.1916404664516449, 'timestamp': '2025-09-30 22:23:46.906708', 'step': 9473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:46.938529', 'step': 9473, 'epoch': 2} {'type': 'loss', 'content': 0.11840641498565674, 'timestamp': '2025-09-30 22:23:46.947012', 'step': 9474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:46.979149', 'step': 9474, 'epoch': 2} {'type': 'loss', 'content': 0.12455114722251892, 'timestamp': '2025-09-30 22:23:46.983357', 'step': 9475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:47.025255', 'step': 9475, 'epoch': 2} {'type': 'loss', 'content': 0.04833361878991127, 'timestamp': '2025-09-30 22:23:47.057623', 'step': 9476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:47.087928', 'step': 9476, 'epoch': 2} {'type': 'loss', 'content': 0.08045448362827301, 'timestamp': '2025-09-30 22:23:47.091114', 'step': 9477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:47.127820', 'step': 9477, 'epoch': 2} {'type': 'loss', 'content': 0.1562536656856537, 'timestamp': '2025-09-30 22:23:47.131287', 'step': 9478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.162131', 'step': 9478, 'epoch': 2} {'type': 'loss', 'content': 0.09584157168865204, 'timestamp': '2025-09-30 22:23:47.165981', 'step': 9479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.200580', 'step': 9479, 'epoch': 2} {'type': 'loss', 'content': 0.19934731721878052, 'timestamp': '2025-09-30 22:23:47.225313', 'step': 9480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.255891', 'step': 9480, 'epoch': 2} {'type': 'loss', 'content': 0.09076867997646332, 'timestamp': '2025-09-30 22:23:47.259860', 'step': 9481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.291079', 'step': 9481, 'epoch': 2} {'type': 'loss', 'content': 0.1446053683757782, 'timestamp': '2025-09-30 22:23:47.296595', 'step': 9482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.328885', 'step': 9482, 'epoch': 2} {'type': 'loss', 'content': 0.161752849817276, 'timestamp': '2025-09-30 22:23:47.331239', 'step': 9483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:47.361941', 'step': 9483, 'epoch': 2} {'type': 'loss', 'content': 0.11454623192548752, 'timestamp': '2025-09-30 22:23:47.386947', 'step': 9484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:47.418182', 'step': 9484, 'epoch': 2} {'type': 'loss', 'content': 0.1569739133119583, 'timestamp': '2025-09-30 22:23:47.425305', 'step': 9485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:47.456817', 'step': 9485, 'epoch': 2} {'type': 'loss', 'content': 0.09030992537736893, 'timestamp': '2025-09-30 22:23:47.472459', 'step': 9486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.502991', 'step': 9486, 'epoch': 2} {'type': 'loss', 'content': 0.10777696222066879, 'timestamp': '2025-09-30 22:23:47.506987', 'step': 9487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.544461', 'step': 9487, 'epoch': 2} {'type': 'loss', 'content': 0.11735698580741882, 'timestamp': '2025-09-30 22:23:47.575751', 'step': 9488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.606715', 'step': 9488, 'epoch': 2} {'type': 'loss', 'content': 0.13349425792694092, 'timestamp': '2025-09-30 22:23:47.610662', 'step': 9489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.641354', 'step': 9489, 'epoch': 2} {'type': 'loss', 'content': 0.10536894202232361, 'timestamp': '2025-09-30 22:23:47.645632', 'step': 9490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:47.676882', 'step': 9490, 'epoch': 2} {'type': 'loss', 'content': 0.0579473115503788, 'timestamp': '2025-09-30 22:23:47.679422', 'step': 9491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:47.716286', 'step': 9491, 'epoch': 2} {'type': 'loss', 'content': 0.08895904570817947, 'timestamp': '2025-09-30 22:23:47.744103', 'step': 9492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.775271', 'step': 9492, 'epoch': 2} {'type': 'loss', 'content': 0.23744483292102814, 'timestamp': '2025-09-30 22:23:47.778207', 'step': 9493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:47.809030', 'step': 9493, 'epoch': 2} {'type': 'loss', 'content': 0.11175946891307831, 'timestamp': '2025-09-30 22:23:47.813512', 'step': 9494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:47.867878', 'step': 9494, 'epoch': 2} {'type': 'loss', 'content': 0.05854736268520355, 'timestamp': '2025-09-30 22:23:47.873115', 'step': 9495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:47.907018', 'step': 9495, 'epoch': 2} {'type': 'loss', 'content': 0.14080484211444855, 'timestamp': '2025-09-30 22:23:47.931019', 'step': 9496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:47.963027', 'step': 9496, 'epoch': 2} {'type': 'loss', 'content': 0.09272409975528717, 'timestamp': '2025-09-30 22:23:47.968381', 'step': 9497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:48.003852', 'step': 9497, 'epoch': 2} {'type': 'loss', 'content': 0.1669013649225235, 'timestamp': '2025-09-30 22:23:48.008659', 'step': 9498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:48.038910', 'step': 9498, 'epoch': 2} {'type': 'loss', 'content': 0.19067415595054626, 'timestamp': '2025-09-30 22:23:48.042061', 'step': 9499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:48.072649', 'step': 9499, 'epoch': 2} {'type': 'loss', 'content': 0.09894917160272598, 'timestamp': '2025-09-30 22:23:48.096696', 'step': 9500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 9500', 'timestamp': '2025-09-30 22:23:52.487683', 'step': 9500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.530879', 'step': 9500, 'epoch': 2} {'type': 'loss', 'content': 0.18002933263778687, 'timestamp': '2025-09-30 22:23:52.537780', 'step': 9501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.572978', 'step': 9501, 'epoch': 2} {'type': 'loss', 'content': 0.08309103548526764, 'timestamp': '2025-09-30 22:23:52.575796', 'step': 9502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.610421', 'step': 9502, 'epoch': 2} {'type': 'loss', 'content': 0.11433303356170654, 'timestamp': '2025-09-30 22:23:52.615426', 'step': 9503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:52.650684', 'step': 9503, 'epoch': 2} {'type': 'loss', 'content': 0.06731189787387848, 'timestamp': '2025-09-30 22:23:52.679124', 'step': 9504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:52.710510', 'step': 9504, 'epoch': 2} {'type': 'loss', 'content': 0.12239154428243637, 'timestamp': '2025-09-30 22:23:52.713171', 'step': 9505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.745917', 'step': 9505, 'epoch': 2} {'type': 'loss', 'content': 0.02796126715838909, 'timestamp': '2025-09-30 22:23:52.753888', 'step': 9506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.784918', 'step': 9506, 'epoch': 2} {'type': 'loss', 'content': 0.13688044250011444, 'timestamp': '2025-09-30 22:23:52.788246', 'step': 9507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:52.819988', 'step': 9507, 'epoch': 2} {'type': 'loss', 'content': 0.16190901398658752, 'timestamp': '2025-09-30 22:23:52.844258', 'step': 9508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:52.874408', 'step': 9508, 'epoch': 2} {'type': 'loss', 'content': 0.07011434435844421, 'timestamp': '2025-09-30 22:23:52.877468', 'step': 9509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:52.912620', 'step': 9509, 'epoch': 2} {'type': 'loss', 'content': 0.12385699898004532, 'timestamp': '2025-09-30 22:23:52.918111', 'step': 9510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:52.952501', 'step': 9510, 'epoch': 2} {'type': 'loss', 'content': 0.1341220736503601, 'timestamp': '2025-09-30 22:23:52.954929', 'step': 9511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:52.986271', 'step': 9511, 'epoch': 2} {'type': 'loss', 'content': 0.11451718956232071, 'timestamp': '2025-09-30 22:23:53.011380', 'step': 9512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.041980', 'step': 9512, 'epoch': 2} {'type': 'loss', 'content': 0.2415749877691269, 'timestamp': '2025-09-30 22:23:53.046483', 'step': 9513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:53.087699', 'step': 9513, 'epoch': 2} {'type': 'loss', 'content': 0.09637214988470078, 'timestamp': '2025-09-30 22:23:53.090182', 'step': 9514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.120071', 'step': 9514, 'epoch': 2} {'type': 'loss', 'content': 0.07947637885808945, 'timestamp': '2025-09-30 22:23:53.122647', 'step': 9515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.154266', 'step': 9515, 'epoch': 2} {'type': 'loss', 'content': 0.0670674741268158, 'timestamp': '2025-09-30 22:23:53.179078', 'step': 9516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.210032', 'step': 9516, 'epoch': 2} {'type': 'loss', 'content': 0.09702646732330322, 'timestamp': '2025-09-30 22:23:53.213525', 'step': 9517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:53.245393', 'step': 9517, 'epoch': 2} {'type': 'loss', 'content': 0.17137889564037323, 'timestamp': '2025-09-30 22:23:53.248455', 'step': 9518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.281271', 'step': 9518, 'epoch': 2} {'type': 'loss', 'content': 0.09599485993385315, 'timestamp': '2025-09-30 22:23:53.285289', 'step': 9519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.317731', 'step': 9519, 'epoch': 2} {'type': 'loss', 'content': 0.11211271584033966, 'timestamp': '2025-09-30 22:23:53.342749', 'step': 9520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.377355', 'step': 9520, 'epoch': 2} {'type': 'loss', 'content': 0.08716260641813278, 'timestamp': '2025-09-30 22:23:53.380564', 'step': 9521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.411328', 'step': 9521, 'epoch': 2} {'type': 'loss', 'content': 0.08982384949922562, 'timestamp': '2025-09-30 22:23:53.414245', 'step': 9522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.444186', 'step': 9522, 'epoch': 2} {'type': 'loss', 'content': 0.1246880516409874, 'timestamp': '2025-09-30 22:23:53.447697', 'step': 9523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.478630', 'step': 9523, 'epoch': 2} {'type': 'loss', 'content': 0.18183813989162445, 'timestamp': '2025-09-30 22:23:53.502831', 'step': 9524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:53.534403', 'step': 9524, 'epoch': 2} {'type': 'loss', 'content': 0.1297331154346466, 'timestamp': '2025-09-30 22:23:53.537299', 'step': 9525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:53.585074', 'step': 9525, 'epoch': 2} {'type': 'loss', 'content': 0.10882602632045746, 'timestamp': '2025-09-30 22:23:53.587485', 'step': 9526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.629452', 'step': 9526, 'epoch': 2} {'type': 'loss', 'content': 0.14804351329803467, 'timestamp': '2025-09-30 22:23:53.631902', 'step': 9527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.665382', 'step': 9527, 'epoch': 2} {'type': 'loss', 'content': 0.13389426469802856, 'timestamp': '2025-09-30 22:23:53.690579', 'step': 9528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:53.721018', 'step': 9528, 'epoch': 2} {'type': 'loss', 'content': 0.1239299401640892, 'timestamp': '2025-09-30 22:23:53.723780', 'step': 9529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.755476', 'step': 9529, 'epoch': 2} {'type': 'loss', 'content': 0.09728720039129257, 'timestamp': '2025-09-30 22:23:53.758648', 'step': 9530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:53.790039', 'step': 9530, 'epoch': 2} {'type': 'loss', 'content': 0.17449542880058289, 'timestamp': '2025-09-30 22:23:53.794238', 'step': 9531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.832157', 'step': 9531, 'epoch': 2} {'type': 'loss', 'content': 0.1476626843214035, 'timestamp': '2025-09-30 22:23:53.856880', 'step': 9532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.887621', 'step': 9532, 'epoch': 2} {'type': 'loss', 'content': 0.1107405498623848, 'timestamp': '2025-09-30 22:23:53.893972', 'step': 9533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:53.933983', 'step': 9533, 'epoch': 2} {'type': 'loss', 'content': 0.08519928902387619, 'timestamp': '2025-09-30 22:23:53.946152', 'step': 9534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:53.977008', 'step': 9534, 'epoch': 2} {'type': 'loss', 'content': 0.08247872442007065, 'timestamp': '2025-09-30 22:23:53.980655', 'step': 9535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:54.011765', 'step': 9535, 'epoch': 2} {'type': 'loss', 'content': 0.08221191167831421, 'timestamp': '2025-09-30 22:23:54.036960', 'step': 9536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.069530', 'step': 9536, 'epoch': 2} {'type': 'loss', 'content': 0.19622226059436798, 'timestamp': '2025-09-30 22:23:54.075306', 'step': 9537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.107180', 'step': 9537, 'epoch': 2} {'type': 'loss', 'content': 0.15191279351711273, 'timestamp': '2025-09-30 22:23:54.121956', 'step': 9538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:54.153881', 'step': 9538, 'epoch': 2} {'type': 'loss', 'content': 0.10095934569835663, 'timestamp': '2025-09-30 22:23:54.157536', 'step': 9539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.187528', 'step': 9539, 'epoch': 2} {'type': 'loss', 'content': 0.12365991622209549, 'timestamp': '2025-09-30 22:23:54.212110', 'step': 9540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.244313', 'step': 9540, 'epoch': 2} {'type': 'loss', 'content': 0.10651898384094238, 'timestamp': '2025-09-30 22:23:54.248459', 'step': 9541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.280106', 'step': 9541, 'epoch': 2} {'type': 'loss', 'content': 0.0988381877541542, 'timestamp': '2025-09-30 22:23:54.283817', 'step': 9542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.321629', 'step': 9542, 'epoch': 2} {'type': 'loss', 'content': 0.17002719640731812, 'timestamp': '2025-09-30 22:23:54.324385', 'step': 9543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.355008', 'step': 9543, 'epoch': 2} {'type': 'loss', 'content': 0.10287229716777802, 'timestamp': '2025-09-30 22:23:54.381748', 'step': 9544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:54.420263', 'step': 9544, 'epoch': 2} {'type': 'loss', 'content': 0.06662723422050476, 'timestamp': '2025-09-30 22:23:54.424045', 'step': 9545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.467231', 'step': 9545, 'epoch': 2} {'type': 'loss', 'content': 0.0625540167093277, 'timestamp': '2025-09-30 22:23:54.471478', 'step': 9546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.505097', 'step': 9546, 'epoch': 2} {'type': 'loss', 'content': 0.1325714886188507, 'timestamp': '2025-09-30 22:23:54.508613', 'step': 9547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.539613', 'step': 9547, 'epoch': 2} {'type': 'loss', 'content': 0.07805681228637695, 'timestamp': '2025-09-30 22:23:54.566038', 'step': 9548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.597398', 'step': 9548, 'epoch': 2} {'type': 'loss', 'content': 0.0949537456035614, 'timestamp': '2025-09-30 22:23:54.601136', 'step': 9549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.633289', 'step': 9549, 'epoch': 2} {'type': 'loss', 'content': 0.03868519142270088, 'timestamp': '2025-09-30 22:23:54.636818', 'step': 9550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.668192', 'step': 9550, 'epoch': 2} {'type': 'loss', 'content': 0.14096707105636597, 'timestamp': '2025-09-30 22:23:54.671635', 'step': 9551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.703417', 'step': 9551, 'epoch': 2} {'type': 'loss', 'content': 0.16014093160629272, 'timestamp': '2025-09-30 22:23:54.727695', 'step': 9552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:54.759360', 'step': 9552, 'epoch': 2} {'type': 'loss', 'content': 0.1426078826189041, 'timestamp': '2025-09-30 22:23:54.761862', 'step': 9553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.792446', 'step': 9553, 'epoch': 2} {'type': 'loss', 'content': 0.1897578239440918, 'timestamp': '2025-09-30 22:23:54.795790', 'step': 9554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.827555', 'step': 9554, 'epoch': 2} {'type': 'loss', 'content': 0.0930851474404335, 'timestamp': '2025-09-30 22:23:54.830666', 'step': 9555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:54.862321', 'step': 9555, 'epoch': 2} {'type': 'loss', 'content': 0.09934347867965698, 'timestamp': '2025-09-30 22:23:54.886472', 'step': 9556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.916810', 'step': 9556, 'epoch': 2} {'type': 'loss', 'content': 0.15219619870185852, 'timestamp': '2025-09-30 22:23:54.919602', 'step': 9557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:54.950416', 'step': 9557, 'epoch': 2} {'type': 'loss', 'content': 0.20870313048362732, 'timestamp': '2025-09-30 22:23:54.952996', 'step': 9558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:54.989223', 'step': 9558, 'epoch': 2} {'type': 'loss', 'content': 0.1083943098783493, 'timestamp': '2025-09-30 22:23:54.992305', 'step': 9559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.023963', 'step': 9559, 'epoch': 2} {'type': 'loss', 'content': 0.13348223268985748, 'timestamp': '2025-09-30 22:23:55.050241', 'step': 9560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.083338', 'step': 9560, 'epoch': 2} {'type': 'loss', 'content': 0.13863539695739746, 'timestamp': '2025-09-30 22:23:55.087360', 'step': 9561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.117336', 'step': 9561, 'epoch': 2} {'type': 'loss', 'content': 0.14662261307239532, 'timestamp': '2025-09-30 22:23:55.120853', 'step': 9562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:55.152945', 'step': 9562, 'epoch': 2} {'type': 'loss', 'content': 0.12476418912410736, 'timestamp': '2025-09-30 22:23:55.155752', 'step': 9563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:55.186582', 'step': 9563, 'epoch': 2} {'type': 'loss', 'content': 0.12968428432941437, 'timestamp': '2025-09-30 22:23:55.210546', 'step': 9564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.240734', 'step': 9564, 'epoch': 2} {'type': 'loss', 'content': 0.13579997420310974, 'timestamp': '2025-09-30 22:23:55.249348', 'step': 9565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.282299', 'step': 9565, 'epoch': 2} {'type': 'loss', 'content': 0.1743159145116806, 'timestamp': '2025-09-30 22:23:55.285348', 'step': 9566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.315788', 'step': 9566, 'epoch': 2} {'type': 'loss', 'content': 0.11565949767827988, 'timestamp': '2025-09-30 22:23:55.319330', 'step': 9567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:55.350940', 'step': 9567, 'epoch': 2} {'type': 'loss', 'content': 0.11915788799524307, 'timestamp': '2025-09-30 22:23:55.377781', 'step': 9568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:55.414728', 'step': 9568, 'epoch': 2} {'type': 'loss', 'content': 0.09198236465454102, 'timestamp': '2025-09-30 22:23:55.421038', 'step': 9569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.452329', 'step': 9569, 'epoch': 2} {'type': 'loss', 'content': 0.07420258969068527, 'timestamp': '2025-09-30 22:23:55.458182', 'step': 9570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.488933', 'step': 9570, 'epoch': 2} {'type': 'loss', 'content': 0.16240154206752777, 'timestamp': '2025-09-30 22:23:55.492260', 'step': 9571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:55.525774', 'step': 9571, 'epoch': 2} {'type': 'loss', 'content': 0.12540410459041595, 'timestamp': '2025-09-30 22:23:55.551368', 'step': 9572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.583951', 'step': 9572, 'epoch': 2} {'type': 'loss', 'content': 0.1457957923412323, 'timestamp': '2025-09-30 22:23:55.586932', 'step': 9573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.617689', 'step': 9573, 'epoch': 2} {'type': 'loss', 'content': 0.12530139088630676, 'timestamp': '2025-09-30 22:23:55.621943', 'step': 9574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:55.653074', 'step': 9574, 'epoch': 2} {'type': 'loss', 'content': 0.19606676697731018, 'timestamp': '2025-09-30 22:23:55.659814', 'step': 9575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:55.699461', 'step': 9575, 'epoch': 2} {'type': 'loss', 'content': 0.14124180376529694, 'timestamp': '2025-09-30 22:23:55.725412', 'step': 9576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:55.756686', 'step': 9576, 'epoch': 2} {'type': 'loss', 'content': 0.13076575100421906, 'timestamp': '2025-09-30 22:23:55.763040', 'step': 9577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:55.795318', 'step': 9577, 'epoch': 2} {'type': 'loss', 'content': 0.09838392585515976, 'timestamp': '2025-09-30 22:23:55.798410', 'step': 9578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.836910', 'step': 9578, 'epoch': 2} {'type': 'loss', 'content': 0.06864691525697708, 'timestamp': '2025-09-30 22:23:55.839446', 'step': 9579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.870467', 'step': 9579, 'epoch': 2} {'type': 'loss', 'content': 0.09926767647266388, 'timestamp': '2025-09-30 22:23:55.894178', 'step': 9580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.924701', 'step': 9580, 'epoch': 2} {'type': 'loss', 'content': 0.08979654312133789, 'timestamp': '2025-09-30 22:23:55.928592', 'step': 9581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:55.959524', 'step': 9581, 'epoch': 2} {'type': 'loss', 'content': 0.12014669179916382, 'timestamp': '2025-09-30 22:23:55.963894', 'step': 9582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:55.995330', 'step': 9582, 'epoch': 2} {'type': 'loss', 'content': 0.09993802011013031, 'timestamp': '2025-09-30 22:23:56.000594', 'step': 9583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:56.031650', 'step': 9583, 'epoch': 2} {'type': 'loss', 'content': 0.08642053604125977, 'timestamp': '2025-09-30 22:23:56.056988', 'step': 9584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:56.090070', 'step': 9584, 'epoch': 2} {'type': 'loss', 'content': 0.10670508444309235, 'timestamp': '2025-09-30 22:23:56.092961', 'step': 9585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.125687', 'step': 9585, 'epoch': 2} {'type': 'loss', 'content': 0.11484093964099884, 'timestamp': '2025-09-30 22:23:56.136118', 'step': 9586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.167316', 'step': 9586, 'epoch': 2} {'type': 'loss', 'content': 0.08215565234422684, 'timestamp': '2025-09-30 22:23:56.170511', 'step': 9587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:56.201364', 'step': 9587, 'epoch': 2} {'type': 'loss', 'content': 0.207597017288208, 'timestamp': '2025-09-30 22:23:56.226552', 'step': 9588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:56.257554', 'step': 9588, 'epoch': 2} {'type': 'loss', 'content': 0.11281079053878784, 'timestamp': '2025-09-30 22:23:56.267535', 'step': 9589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:56.300539', 'step': 9589, 'epoch': 2} {'type': 'loss', 'content': 0.07479384541511536, 'timestamp': '2025-09-30 22:23:56.303595', 'step': 9590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.333438', 'step': 9590, 'epoch': 2} {'type': 'loss', 'content': 0.09147141128778458, 'timestamp': '2025-09-30 22:23:56.336806', 'step': 9591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.370820', 'step': 9591, 'epoch': 2} {'type': 'loss', 'content': 0.19825908541679382, 'timestamp': '2025-09-30 22:23:56.395593', 'step': 9592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:56.427891', 'step': 9592, 'epoch': 2} {'type': 'loss', 'content': 0.09428278356790543, 'timestamp': '2025-09-30 22:23:56.431055', 'step': 9593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.461672', 'step': 9593, 'epoch': 2} {'type': 'loss', 'content': 0.06042972207069397, 'timestamp': '2025-09-30 22:23:56.466308', 'step': 9594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:56.497806', 'step': 9594, 'epoch': 2} {'type': 'loss', 'content': 0.24334152042865753, 'timestamp': '2025-09-30 22:23:56.501959', 'step': 9595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:56.534340', 'step': 9595, 'epoch': 2} {'type': 'loss', 'content': 0.0461520180106163, 'timestamp': '2025-09-30 22:23:56.559365', 'step': 9596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.595785', 'step': 9596, 'epoch': 2} {'type': 'loss', 'content': 0.12352089583873749, 'timestamp': '2025-09-30 22:23:56.599436', 'step': 9597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:56.630608', 'step': 9597, 'epoch': 2} {'type': 'loss', 'content': 0.19163207709789276, 'timestamp': '2025-09-30 22:23:56.635067', 'step': 9598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:56.665483', 'step': 9598, 'epoch': 2} {'type': 'loss', 'content': 0.07107851654291153, 'timestamp': '2025-09-30 22:23:56.669085', 'step': 9599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.706276', 'step': 9599, 'epoch': 2} {'type': 'loss', 'content': 0.0735129565000534, 'timestamp': '2025-09-30 22:23:56.731959', 'step': 9600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:56.764310', 'step': 9600, 'epoch': 2} {'type': 'loss', 'content': 0.15296940505504608, 'timestamp': '2025-09-30 22:23:56.767232', 'step': 9601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.797775', 'step': 9601, 'epoch': 2} {'type': 'loss', 'content': 0.07807939499616623, 'timestamp': '2025-09-30 22:23:56.802775', 'step': 9602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:56.842518', 'step': 9602, 'epoch': 2} {'type': 'loss', 'content': 0.10479211807250977, 'timestamp': '2025-09-30 22:23:56.846643', 'step': 9603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:56.878883', 'step': 9603, 'epoch': 2} {'type': 'loss', 'content': 0.11960558593273163, 'timestamp': '2025-09-30 22:23:56.904869', 'step': 9604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:56.937712', 'step': 9604, 'epoch': 2} {'type': 'loss', 'content': 0.10175777226686478, 'timestamp': '2025-09-30 22:23:56.941855', 'step': 9605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:56.975049', 'step': 9605, 'epoch': 2} {'type': 'loss', 'content': 0.10278091579675674, 'timestamp': '2025-09-30 22:23:56.978849', 'step': 9606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.009405', 'step': 9606, 'epoch': 2} {'type': 'loss', 'content': 0.11452534049749374, 'timestamp': '2025-09-30 22:23:57.012304', 'step': 9607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.042837', 'step': 9607, 'epoch': 2} {'type': 'loss', 'content': 0.08107882738113403, 'timestamp': '2025-09-30 22:23:57.067584', 'step': 9608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.099745', 'step': 9608, 'epoch': 2} {'type': 'loss', 'content': 0.09760820120573044, 'timestamp': '2025-09-30 22:23:57.110271', 'step': 9609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.150647', 'step': 9609, 'epoch': 2} {'type': 'loss', 'content': 0.0785926803946495, 'timestamp': '2025-09-30 22:23:57.155646', 'step': 9610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.186697', 'step': 9610, 'epoch': 2} {'type': 'loss', 'content': 0.14431224763393402, 'timestamp': '2025-09-30 22:23:57.189939', 'step': 9611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.221633', 'step': 9611, 'epoch': 2} {'type': 'loss', 'content': 0.10693594813346863, 'timestamp': '2025-09-30 22:23:57.247505', 'step': 9612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:57.278264', 'step': 9612, 'epoch': 2} {'type': 'loss', 'content': 0.06767193973064423, 'timestamp': '2025-09-30 22:23:57.281990', 'step': 9613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.314267', 'step': 9613, 'epoch': 2} {'type': 'loss', 'content': 0.07738925516605377, 'timestamp': '2025-09-30 22:23:57.316925', 'step': 9614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.356830', 'step': 9614, 'epoch': 2} {'type': 'loss', 'content': 0.12596185505390167, 'timestamp': '2025-09-30 22:23:57.361811', 'step': 9615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.393856', 'step': 9615, 'epoch': 2} {'type': 'loss', 'content': 0.1478438675403595, 'timestamp': '2025-09-30 22:23:57.418193', 'step': 9616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.450107', 'step': 9616, 'epoch': 2} {'type': 'loss', 'content': 0.11748986691236496, 'timestamp': '2025-09-30 22:23:57.453719', 'step': 9617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:57.486376', 'step': 9617, 'epoch': 2} {'type': 'loss', 'content': 0.05165290832519531, 'timestamp': '2025-09-30 22:23:57.490939', 'step': 9618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.526624', 'step': 9618, 'epoch': 2} {'type': 'loss', 'content': 0.15211306512355804, 'timestamp': '2025-09-30 22:23:57.530615', 'step': 9619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.561078', 'step': 9619, 'epoch': 2} {'type': 'loss', 'content': 0.11316029727458954, 'timestamp': '2025-09-30 22:23:57.587255', 'step': 9620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.619217', 'step': 9620, 'epoch': 2} {'type': 'loss', 'content': 0.097879558801651, 'timestamp': '2025-09-30 22:23:57.622920', 'step': 9621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:57.659036', 'step': 9621, 'epoch': 2} {'type': 'loss', 'content': 0.18945176899433136, 'timestamp': '2025-09-30 22:23:57.663542', 'step': 9622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.706620', 'step': 9622, 'epoch': 2} {'type': 'loss', 'content': 0.15249750018119812, 'timestamp': '2025-09-30 22:23:57.723498', 'step': 9623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.763499', 'step': 9623, 'epoch': 2} {'type': 'loss', 'content': 0.16639205813407898, 'timestamp': '2025-09-30 22:23:57.790114', 'step': 9624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.823293', 'step': 9624, 'epoch': 2} {'type': 'loss', 'content': 0.13170118629932404, 'timestamp': '2025-09-30 22:23:57.827053', 'step': 9625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.857177', 'step': 9625, 'epoch': 2} {'type': 'loss', 'content': 0.08253934234380722, 'timestamp': '2025-09-30 22:23:57.862023', 'step': 9626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:57.903411', 'step': 9626, 'epoch': 2} {'type': 'loss', 'content': 0.11353994160890579, 'timestamp': '2025-09-30 22:23:57.907042', 'step': 9627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.938526', 'step': 9627, 'epoch': 2} {'type': 'loss', 'content': 0.22558161616325378, 'timestamp': '2025-09-30 22:23:57.963567', 'step': 9628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:57.995169', 'step': 9628, 'epoch': 2} {'type': 'loss', 'content': 0.13722406327724457, 'timestamp': '2025-09-30 22:23:58.008964', 'step': 9629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:58.040356', 'step': 9629, 'epoch': 2} {'type': 'loss', 'content': 0.11783940345048904, 'timestamp': '2025-09-30 22:23:58.043059', 'step': 9630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.073959', 'step': 9630, 'epoch': 2} {'type': 'loss', 'content': 0.10962403565645218, 'timestamp': '2025-09-30 22:23:58.077611', 'step': 9631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.111984', 'step': 9631, 'epoch': 2} {'type': 'loss', 'content': 0.21026848256587982, 'timestamp': '2025-09-30 22:23:58.138031', 'step': 9632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.170155', 'step': 9632, 'epoch': 2} {'type': 'loss', 'content': 0.19173197448253632, 'timestamp': '2025-09-30 22:23:58.174516', 'step': 9633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.206434', 'step': 9633, 'epoch': 2} {'type': 'loss', 'content': 0.11564825475215912, 'timestamp': '2025-09-30 22:23:58.210944', 'step': 9634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:58.255539', 'step': 9634, 'epoch': 2} {'type': 'loss', 'content': 0.14689205586910248, 'timestamp': '2025-09-30 22:23:58.260861', 'step': 9635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:23:58.302866', 'step': 9635, 'epoch': 2} {'type': 'loss', 'content': 0.1189439445734024, 'timestamp': '2025-09-30 22:23:58.336823', 'step': 9636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:58.368159', 'step': 9636, 'epoch': 2} {'type': 'loss', 'content': 0.12043298780918121, 'timestamp': '2025-09-30 22:23:58.371944', 'step': 9637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.403169', 'step': 9637, 'epoch': 2} {'type': 'loss', 'content': 0.10067128390073776, 'timestamp': '2025-09-30 22:23:58.407823', 'step': 9638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.439569', 'step': 9638, 'epoch': 2} {'type': 'loss', 'content': 0.07433290779590607, 'timestamp': '2025-09-30 22:23:58.443771', 'step': 9639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:58.475181', 'step': 9639, 'epoch': 2} {'type': 'loss', 'content': 0.16084212064743042, 'timestamp': '2025-09-30 22:23:58.500885', 'step': 9640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:58.533184', 'step': 9640, 'epoch': 2} {'type': 'loss', 'content': 0.09206556528806686, 'timestamp': '2025-09-30 22:23:58.538342', 'step': 9641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.571047', 'step': 9641, 'epoch': 2} {'type': 'loss', 'content': 0.24004848301410675, 'timestamp': '2025-09-30 22:23:58.575673', 'step': 9642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.608218', 'step': 9642, 'epoch': 2} {'type': 'loss', 'content': 0.06392711400985718, 'timestamp': '2025-09-30 22:23:58.612800', 'step': 9643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.644718', 'step': 9643, 'epoch': 2} {'type': 'loss', 'content': 0.09673319011926651, 'timestamp': '2025-09-30 22:23:58.669651', 'step': 9644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.701106', 'step': 9644, 'epoch': 2} {'type': 'loss', 'content': 0.22387133538722992, 'timestamp': '2025-09-30 22:23:58.704557', 'step': 9645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.737949', 'step': 9645, 'epoch': 2} {'type': 'loss', 'content': 0.1361742913722992, 'timestamp': '2025-09-30 22:23:58.741271', 'step': 9646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.771186', 'step': 9646, 'epoch': 2} {'type': 'loss', 'content': 0.09574901312589645, 'timestamp': '2025-09-30 22:23:58.773883', 'step': 9647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.805639', 'step': 9647, 'epoch': 2} {'type': 'loss', 'content': 0.23766618967056274, 'timestamp': '2025-09-30 22:23:58.830776', 'step': 9648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:58.862813', 'step': 9648, 'epoch': 2} {'type': 'loss', 'content': 0.06956634670495987, 'timestamp': '2025-09-30 22:23:58.866601', 'step': 9649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:58.897107', 'step': 9649, 'epoch': 2} {'type': 'loss', 'content': 0.12997622787952423, 'timestamp': '2025-09-30 22:23:58.899292', 'step': 9650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:58.934804', 'step': 9650, 'epoch': 2} {'type': 'loss', 'content': 0.0752449706196785, 'timestamp': '2025-09-30 22:23:58.940006', 'step': 9651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:58.970837', 'step': 9651, 'epoch': 2} {'type': 'loss', 'content': 0.09459779411554337, 'timestamp': '2025-09-30 22:23:58.995752', 'step': 9652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.026043', 'step': 9652, 'epoch': 2} {'type': 'loss', 'content': 0.11140887439250946, 'timestamp': '2025-09-30 22:23:59.029484', 'step': 9653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.060777', 'step': 9653, 'epoch': 2} {'type': 'loss', 'content': 0.11713731288909912, 'timestamp': '2025-09-30 22:23:59.065071', 'step': 9654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.096999', 'step': 9654, 'epoch': 2} {'type': 'loss', 'content': 0.18209169805049896, 'timestamp': '2025-09-30 22:23:59.105728', 'step': 9655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:59.143785', 'step': 9655, 'epoch': 2} {'type': 'loss', 'content': 0.11752118170261383, 'timestamp': '2025-09-30 22:23:59.174467', 'step': 9656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.204452', 'step': 9656, 'epoch': 2} {'type': 'loss', 'content': 0.1130952388048172, 'timestamp': '2025-09-30 22:23:59.208956', 'step': 9657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:59.240526', 'step': 9657, 'epoch': 2} {'type': 'loss', 'content': 0.04498002678155899, 'timestamp': '2025-09-30 22:23:59.244864', 'step': 9658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.276011', 'step': 9658, 'epoch': 2} {'type': 'loss', 'content': 0.12627902626991272, 'timestamp': '2025-09-30 22:23:59.279133', 'step': 9659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:59.310703', 'step': 9659, 'epoch': 2} {'type': 'loss', 'content': 0.10658130794763565, 'timestamp': '2025-09-30 22:23:59.334666', 'step': 9660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.365156', 'step': 9660, 'epoch': 2} {'type': 'loss', 'content': 0.09604543447494507, 'timestamp': '2025-09-30 22:23:59.368243', 'step': 9661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:23:59.399324', 'step': 9661, 'epoch': 2} {'type': 'loss', 'content': 0.12607131898403168, 'timestamp': '2025-09-30 22:23:59.405116', 'step': 9662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.437770', 'step': 9662, 'epoch': 2} {'type': 'loss', 'content': 0.1029064804315567, 'timestamp': '2025-09-30 22:23:59.444371', 'step': 9663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.474664', 'step': 9663, 'epoch': 2} {'type': 'loss', 'content': 0.057598914951086044, 'timestamp': '2025-09-30 22:23:59.499340', 'step': 9664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.530845', 'step': 9664, 'epoch': 2} {'type': 'loss', 'content': 0.1029372438788414, 'timestamp': '2025-09-30 22:23:59.533873', 'step': 9665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:59.574630', 'step': 9665, 'epoch': 2} {'type': 'loss', 'content': 0.20157861709594727, 'timestamp': '2025-09-30 22:23:59.577492', 'step': 9666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:59.609059', 'step': 9666, 'epoch': 2} {'type': 'loss', 'content': 0.11878395080566406, 'timestamp': '2025-09-30 22:23:59.612123', 'step': 9667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:23:59.643579', 'step': 9667, 'epoch': 2} {'type': 'loss', 'content': 0.1368582844734192, 'timestamp': '2025-09-30 22:23:59.670886', 'step': 9668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.702505', 'step': 9668, 'epoch': 2} {'type': 'loss', 'content': 0.09139790385961533, 'timestamp': '2025-09-30 22:23:59.711461', 'step': 9669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.742683', 'step': 9669, 'epoch': 2} {'type': 'loss', 'content': 0.19846057891845703, 'timestamp': '2025-09-30 22:23:59.746243', 'step': 9670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.782147', 'step': 9670, 'epoch': 2} {'type': 'loss', 'content': 0.20277954638004303, 'timestamp': '2025-09-30 22:23:59.798351', 'step': 9671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:23:59.835388', 'step': 9671, 'epoch': 2} {'type': 'loss', 'content': 0.21086908876895905, 'timestamp': '2025-09-30 22:23:59.863928', 'step': 9672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:23:59.894828', 'step': 9672, 'epoch': 2} {'type': 'loss', 'content': 0.05514480546116829, 'timestamp': '2025-09-30 22:23:59.898929', 'step': 9673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:23:59.931708', 'step': 9673, 'epoch': 2} {'type': 'loss', 'content': 0.11837776005268097, 'timestamp': '2025-09-30 22:23:59.935351', 'step': 9674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:23:59.967061', 'step': 9674, 'epoch': 2} {'type': 'loss', 'content': 0.1127316951751709, 'timestamp': '2025-09-30 22:23:59.975642', 'step': 9675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.007334', 'step': 9675, 'epoch': 2} {'type': 'loss', 'content': 0.1806437373161316, 'timestamp': '2025-09-30 22:24:00.032433', 'step': 9676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.071318', 'step': 9676, 'epoch': 2} {'type': 'loss', 'content': 0.046394750475883484, 'timestamp': '2025-09-30 22:24:00.074912', 'step': 9677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.106965', 'step': 9677, 'epoch': 2} {'type': 'loss', 'content': 0.1937246024608612, 'timestamp': '2025-09-30 22:24:00.110481', 'step': 9678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.141620', 'step': 9678, 'epoch': 2} {'type': 'loss', 'content': 0.09134829789400101, 'timestamp': '2025-09-30 22:24:00.145435', 'step': 9679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:00.177387', 'step': 9679, 'epoch': 2} {'type': 'loss', 'content': 0.10102647542953491, 'timestamp': '2025-09-30 22:24:00.203793', 'step': 9680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:00.240696', 'step': 9680, 'epoch': 2} {'type': 'loss', 'content': 0.06324180960655212, 'timestamp': '2025-09-30 22:24:00.244203', 'step': 9681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.275161', 'step': 9681, 'epoch': 2} {'type': 'loss', 'content': 0.05144558101892471, 'timestamp': '2025-09-30 22:24:00.286235', 'step': 9682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.323285', 'step': 9682, 'epoch': 2} {'type': 'loss', 'content': 0.17408417165279388, 'timestamp': '2025-09-30 22:24:00.326176', 'step': 9683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:00.358272', 'step': 9683, 'epoch': 2} {'type': 'loss', 'content': 0.16275425255298615, 'timestamp': '2025-09-30 22:24:00.382599', 'step': 9684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:00.413542', 'step': 9684, 'epoch': 2} {'type': 'loss', 'content': 0.11672624945640564, 'timestamp': '2025-09-30 22:24:00.417277', 'step': 9685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.449267', 'step': 9685, 'epoch': 2} {'type': 'loss', 'content': 0.16234442591667175, 'timestamp': '2025-09-30 22:24:00.451480', 'step': 9686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.482679', 'step': 9686, 'epoch': 2} {'type': 'loss', 'content': 0.10410545021295547, 'timestamp': '2025-09-30 22:24:00.496823', 'step': 9687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:00.546579', 'step': 9687, 'epoch': 2} {'type': 'loss', 'content': 0.11195120215415955, 'timestamp': '2025-09-30 22:24:00.570670', 'step': 9688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:00.604991', 'step': 9688, 'epoch': 2} {'type': 'loss', 'content': 0.0979958325624466, 'timestamp': '2025-09-30 22:24:00.609936', 'step': 9689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.648677', 'step': 9689, 'epoch': 2} {'type': 'loss', 'content': 0.101614810526371, 'timestamp': '2025-09-30 22:24:00.653394', 'step': 9690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.687420', 'step': 9690, 'epoch': 2} {'type': 'loss', 'content': 0.09488138556480408, 'timestamp': '2025-09-30 22:24:00.690939', 'step': 9691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.723423', 'step': 9691, 'epoch': 2} {'type': 'loss', 'content': 0.10757385939359665, 'timestamp': '2025-09-30 22:24:00.748826', 'step': 9692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.784455', 'step': 9692, 'epoch': 2} {'type': 'loss', 'content': 0.07229668647050858, 'timestamp': '2025-09-30 22:24:00.788128', 'step': 9693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.819402', 'step': 9693, 'epoch': 2} {'type': 'loss', 'content': 0.14570726454257965, 'timestamp': '2025-09-30 22:24:00.823240', 'step': 9694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:00.860047', 'step': 9694, 'epoch': 2} {'type': 'loss', 'content': 0.12437298148870468, 'timestamp': '2025-09-30 22:24:00.864697', 'step': 9695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:00.896689', 'step': 9695, 'epoch': 2} {'type': 'loss', 'content': 0.10144978016614914, 'timestamp': '2025-09-30 22:24:00.920898', 'step': 9696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:00.951393', 'step': 9696, 'epoch': 2} {'type': 'loss', 'content': 0.1450217068195343, 'timestamp': '2025-09-30 22:24:00.953742', 'step': 9697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:00.985037', 'step': 9697, 'epoch': 2} {'type': 'loss', 'content': 0.12611787021160126, 'timestamp': '2025-09-30 22:24:00.988909', 'step': 9698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.019391', 'step': 9698, 'epoch': 2} {'type': 'loss', 'content': 0.09662230312824249, 'timestamp': '2025-09-30 22:24:01.022986', 'step': 9699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.053223', 'step': 9699, 'epoch': 2} {'type': 'loss', 'content': 0.05096780136227608, 'timestamp': '2025-09-30 22:24:01.081044', 'step': 9700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.116408', 'step': 9700, 'epoch': 2} {'type': 'loss', 'content': 0.14259681105613708, 'timestamp': '2025-09-30 22:24:01.121362', 'step': 9701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.155088', 'step': 9701, 'epoch': 2} {'type': 'loss', 'content': 0.14327159523963928, 'timestamp': '2025-09-30 22:24:01.158947', 'step': 9702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:01.190816', 'step': 9702, 'epoch': 2} {'type': 'loss', 'content': 0.18164169788360596, 'timestamp': '2025-09-30 22:24:01.195400', 'step': 9703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:01.226174', 'step': 9703, 'epoch': 2} {'type': 'loss', 'content': 0.054797809571027756, 'timestamp': '2025-09-30 22:24:01.251260', 'step': 9704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.282600', 'step': 9704, 'epoch': 2} {'type': 'loss', 'content': 0.17224648594856262, 'timestamp': '2025-09-30 22:24:01.286685', 'step': 9705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:01.317475', 'step': 9705, 'epoch': 2} {'type': 'loss', 'content': 0.12335722893476486, 'timestamp': '2025-09-30 22:24:01.323665', 'step': 9706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:01.356032', 'step': 9706, 'epoch': 2} {'type': 'loss', 'content': 0.10853108763694763, 'timestamp': '2025-09-30 22:24:01.358882', 'step': 9707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:01.393573', 'step': 9707, 'epoch': 2} {'type': 'loss', 'content': 0.09296884387731552, 'timestamp': '2025-09-30 22:24:01.417919', 'step': 9708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:01.449295', 'step': 9708, 'epoch': 2} {'type': 'loss', 'content': 0.06925377994775772, 'timestamp': '2025-09-30 22:24:01.452887', 'step': 9709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:01.483809', 'step': 9709, 'epoch': 2} {'type': 'loss', 'content': 0.1338062882423401, 'timestamp': '2025-09-30 22:24:01.487963', 'step': 9710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:01.520928', 'step': 9710, 'epoch': 2} {'type': 'loss', 'content': 0.07626918703317642, 'timestamp': '2025-09-30 22:24:01.523960', 'step': 9711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.560103', 'step': 9711, 'epoch': 2} {'type': 'loss', 'content': 0.11896942555904388, 'timestamp': '2025-09-30 22:24:01.585728', 'step': 9712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:01.615791', 'step': 9712, 'epoch': 2} {'type': 'loss', 'content': 0.10355313122272491, 'timestamp': '2025-09-30 22:24:01.620349', 'step': 9713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.651169', 'step': 9713, 'epoch': 2} {'type': 'loss', 'content': 0.11269976198673248, 'timestamp': '2025-09-30 22:24:01.655659', 'step': 9714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:01.695670', 'step': 9714, 'epoch': 2} {'type': 'loss', 'content': 0.1009543389081955, 'timestamp': '2025-09-30 22:24:01.700042', 'step': 9715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.731268', 'step': 9715, 'epoch': 2} {'type': 'loss', 'content': 0.06698687374591827, 'timestamp': '2025-09-30 22:24:01.755959', 'step': 9716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.786417', 'step': 9716, 'epoch': 2} {'type': 'loss', 'content': 0.16016267240047455, 'timestamp': '2025-09-30 22:24:01.792715', 'step': 9717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:01.827691', 'step': 9717, 'epoch': 2} {'type': 'loss', 'content': 0.049354471266269684, 'timestamp': '2025-09-30 22:24:01.832166', 'step': 9718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.865165', 'step': 9718, 'epoch': 2} {'type': 'loss', 'content': 0.1608630269765854, 'timestamp': '2025-09-30 22:24:01.868915', 'step': 9719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:01.900174', 'step': 9719, 'epoch': 2} {'type': 'loss', 'content': 0.062124766409397125, 'timestamp': '2025-09-30 22:24:01.926069', 'step': 9720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:01.957462', 'step': 9720, 'epoch': 2} {'type': 'loss', 'content': 0.144950270652771, 'timestamp': '2025-09-30 22:24:01.960667', 'step': 9721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:01.992879', 'step': 9721, 'epoch': 2} {'type': 'loss', 'content': 0.13774533569812775, 'timestamp': '2025-09-30 22:24:01.997399', 'step': 9722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:02.028089', 'step': 9722, 'epoch': 2} {'type': 'loss', 'content': 0.07287523150444031, 'timestamp': '2025-09-30 22:24:02.030883', 'step': 9723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.068373', 'step': 9723, 'epoch': 2} {'type': 'loss', 'content': 0.09904956817626953, 'timestamp': '2025-09-30 22:24:02.092595', 'step': 9724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:02.122805', 'step': 9724, 'epoch': 2} {'type': 'loss', 'content': 0.1016678437590599, 'timestamp': '2025-09-30 22:24:02.125901', 'step': 9725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.156675', 'step': 9725, 'epoch': 2} {'type': 'loss', 'content': 0.13817398250102997, 'timestamp': '2025-09-30 22:24:02.159684', 'step': 9726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:02.189921', 'step': 9726, 'epoch': 2} {'type': 'loss', 'content': 0.16922852396965027, 'timestamp': '2025-09-30 22:24:02.192815', 'step': 9727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.227540', 'step': 9727, 'epoch': 2} {'type': 'loss', 'content': 0.050940826535224915, 'timestamp': '2025-09-30 22:24:02.252384', 'step': 9728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.282607', 'step': 9728, 'epoch': 2} {'type': 'loss', 'content': 0.17293816804885864, 'timestamp': '2025-09-30 22:24:02.286757', 'step': 9729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.316815', 'step': 9729, 'epoch': 2} {'type': 'loss', 'content': 0.06480544060468674, 'timestamp': '2025-09-30 22:24:02.319921', 'step': 9730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.349652', 'step': 9730, 'epoch': 2} {'type': 'loss', 'content': 0.12509262561798096, 'timestamp': '2025-09-30 22:24:02.352731', 'step': 9731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.383165', 'step': 9731, 'epoch': 2} {'type': 'loss', 'content': 0.09862048923969269, 'timestamp': '2025-09-30 22:24:02.407588', 'step': 9732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:02.438794', 'step': 9732, 'epoch': 2} {'type': 'loss', 'content': 0.12125767767429352, 'timestamp': '2025-09-30 22:24:02.441769', 'step': 9733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.473312', 'step': 9733, 'epoch': 2} {'type': 'loss', 'content': 0.07793954014778137, 'timestamp': '2025-09-30 22:24:02.476757', 'step': 9734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.508606', 'step': 9734, 'epoch': 2} {'type': 'loss', 'content': 0.12695381045341492, 'timestamp': '2025-09-30 22:24:02.512085', 'step': 9735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.544081', 'step': 9735, 'epoch': 2} {'type': 'loss', 'content': 0.15408049523830414, 'timestamp': '2025-09-30 22:24:02.570849', 'step': 9736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.602703', 'step': 9736, 'epoch': 2} {'type': 'loss', 'content': 0.11827798187732697, 'timestamp': '2025-09-30 22:24:02.606199', 'step': 9737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.636769', 'step': 9737, 'epoch': 2} {'type': 'loss', 'content': 0.12465855479240417, 'timestamp': '2025-09-30 22:24:02.640890', 'step': 9738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.671591', 'step': 9738, 'epoch': 2} {'type': 'loss', 'content': 0.1650850474834442, 'timestamp': '2025-09-30 22:24:02.675425', 'step': 9739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:02.709753', 'step': 9739, 'epoch': 2} {'type': 'loss', 'content': 0.14396823942661285, 'timestamp': '2025-09-30 22:24:02.741722', 'step': 9740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.774021', 'step': 9740, 'epoch': 2} {'type': 'loss', 'content': 0.15587137639522552, 'timestamp': '2025-09-30 22:24:02.778201', 'step': 9741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.812357', 'step': 9741, 'epoch': 2} {'type': 'loss', 'content': 0.11252952367067337, 'timestamp': '2025-09-30 22:24:02.816105', 'step': 9742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.848902', 'step': 9742, 'epoch': 2} {'type': 'loss', 'content': 0.15821078419685364, 'timestamp': '2025-09-30 22:24:02.854244', 'step': 9743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:02.885937', 'step': 9743, 'epoch': 2} {'type': 'loss', 'content': 0.10961614549160004, 'timestamp': '2025-09-30 22:24:02.911385', 'step': 9744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:02.954533', 'step': 9744, 'epoch': 2} {'type': 'loss', 'content': 0.07205556333065033, 'timestamp': '2025-09-30 22:24:02.958131', 'step': 9745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:02.989517', 'step': 9745, 'epoch': 2} {'type': 'loss', 'content': 0.07282840460538864, 'timestamp': '2025-09-30 22:24:02.993771', 'step': 9746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:03.025251', 'step': 9746, 'epoch': 2} {'type': 'loss', 'content': 0.06337247788906097, 'timestamp': '2025-09-30 22:24:03.036059', 'step': 9747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.067797', 'step': 9747, 'epoch': 2} {'type': 'loss', 'content': 0.14671634137630463, 'timestamp': '2025-09-30 22:24:03.094378', 'step': 9748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:03.126403', 'step': 9748, 'epoch': 2} {'type': 'loss', 'content': 0.1629807949066162, 'timestamp': '2025-09-30 22:24:03.130515', 'step': 9749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:03.162273', 'step': 9749, 'epoch': 2} {'type': 'loss', 'content': 0.14727528393268585, 'timestamp': '2025-09-30 22:24:03.174337', 'step': 9750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.205418', 'step': 9750, 'epoch': 2} {'type': 'loss', 'content': 0.17528338730335236, 'timestamp': '2025-09-30 22:24:03.209717', 'step': 9751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.240396', 'step': 9751, 'epoch': 2} {'type': 'loss', 'content': 0.12798088788986206, 'timestamp': '2025-09-30 22:24:03.265090', 'step': 9752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.302313', 'step': 9752, 'epoch': 2} {'type': 'loss', 'content': 0.11336090415716171, 'timestamp': '2025-09-30 22:24:03.306556', 'step': 9753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.338004', 'step': 9753, 'epoch': 2} {'type': 'loss', 'content': 0.11379335820674896, 'timestamp': '2025-09-30 22:24:03.341833', 'step': 9754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:03.373271', 'step': 9754, 'epoch': 2} {'type': 'loss', 'content': 0.2087876796722412, 'timestamp': '2025-09-30 22:24:03.376524', 'step': 9755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.407704', 'step': 9755, 'epoch': 2} {'type': 'loss', 'content': 0.08094644546508789, 'timestamp': '2025-09-30 22:24:03.433363', 'step': 9756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.465874', 'step': 9756, 'epoch': 2} {'type': 'loss', 'content': 0.10625836998224258, 'timestamp': '2025-09-30 22:24:03.469064', 'step': 9757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:03.507711', 'step': 9757, 'epoch': 2} {'type': 'loss', 'content': 0.05343526601791382, 'timestamp': '2025-09-30 22:24:03.511362', 'step': 9758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.546241', 'step': 9758, 'epoch': 2} {'type': 'loss', 'content': 0.11366674304008484, 'timestamp': '2025-09-30 22:24:03.558643', 'step': 9759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:03.597692', 'step': 9759, 'epoch': 2} {'type': 'loss', 'content': 0.12677933275699615, 'timestamp': '2025-09-30 22:24:03.621942', 'step': 9760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.652287', 'step': 9760, 'epoch': 2} {'type': 'loss', 'content': 0.09661580622196198, 'timestamp': '2025-09-30 22:24:03.655572', 'step': 9761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.692568', 'step': 9761, 'epoch': 2} {'type': 'loss', 'content': 0.07164566218852997, 'timestamp': '2025-09-30 22:24:03.698380', 'step': 9762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:03.734160', 'step': 9762, 'epoch': 2} {'type': 'loss', 'content': 0.1681099832057953, 'timestamp': '2025-09-30 22:24:03.737055', 'step': 9763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.768399', 'step': 9763, 'epoch': 2} {'type': 'loss', 'content': 0.11070814728736877, 'timestamp': '2025-09-30 22:24:03.801597', 'step': 9764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:03.841167', 'step': 9764, 'epoch': 2} {'type': 'loss', 'content': 0.12331546097993851, 'timestamp': '2025-09-30 22:24:03.845424', 'step': 9765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:03.892104', 'step': 9765, 'epoch': 2} {'type': 'loss', 'content': 0.11453653872013092, 'timestamp': '2025-09-30 22:24:03.896181', 'step': 9766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.932777', 'step': 9766, 'epoch': 2} {'type': 'loss', 'content': 0.06174514815211296, 'timestamp': '2025-09-30 22:24:03.935346', 'step': 9767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:03.965991', 'step': 9767, 'epoch': 2} {'type': 'loss', 'content': 0.21858994662761688, 'timestamp': '2025-09-30 22:24:03.990384', 'step': 9768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:04.022349', 'step': 9768, 'epoch': 2} {'type': 'loss', 'content': 0.16539813578128815, 'timestamp': '2025-09-30 22:24:04.025952', 'step': 9769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:04.062477', 'step': 9769, 'epoch': 2} {'type': 'loss', 'content': 0.17438991367816925, 'timestamp': '2025-09-30 22:24:04.071064', 'step': 9770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.109894', 'step': 9770, 'epoch': 2} {'type': 'loss', 'content': 0.05523278936743736, 'timestamp': '2025-09-30 22:24:04.112564', 'step': 9771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.143726', 'step': 9771, 'epoch': 2} {'type': 'loss', 'content': 0.09121368825435638, 'timestamp': '2025-09-30 22:24:04.168318', 'step': 9772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:04.200172', 'step': 9772, 'epoch': 2} {'type': 'loss', 'content': 0.09815263003110886, 'timestamp': '2025-09-30 22:24:04.207297', 'step': 9773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.239707', 'step': 9773, 'epoch': 2} {'type': 'loss', 'content': 0.09517470002174377, 'timestamp': '2025-09-30 22:24:04.243677', 'step': 9774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:04.274847', 'step': 9774, 'epoch': 2} {'type': 'loss', 'content': 0.07066908478736877, 'timestamp': '2025-09-30 22:24:04.278685', 'step': 9775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:04.311544', 'step': 9775, 'epoch': 2} {'type': 'loss', 'content': 0.0693117007613182, 'timestamp': '2025-09-30 22:24:04.338503', 'step': 9776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:04.368851', 'step': 9776, 'epoch': 2} {'type': 'loss', 'content': 0.08070237934589386, 'timestamp': '2025-09-30 22:24:04.371533', 'step': 9777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.403048', 'step': 9777, 'epoch': 2} {'type': 'loss', 'content': 0.07553703337907791, 'timestamp': '2025-09-30 22:24:04.407277', 'step': 9778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:04.438816', 'step': 9778, 'epoch': 2} {'type': 'loss', 'content': 0.17111852765083313, 'timestamp': '2025-09-30 22:24:04.454753', 'step': 9779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:24:04.486756', 'step': 9779, 'epoch': 2} {'type': 'loss', 'content': 0.049505095928907394, 'timestamp': '2025-09-30 22:24:04.512444', 'step': 9780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:04.552097', 'step': 9780, 'epoch': 2} {'type': 'loss', 'content': 0.13491331040859222, 'timestamp': '2025-09-30 22:24:04.556461', 'step': 9781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.586985', 'step': 9781, 'epoch': 2} {'type': 'loss', 'content': 0.12719978392124176, 'timestamp': '2025-09-30 22:24:04.590749', 'step': 9782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:04.621637', 'step': 9782, 'epoch': 2} {'type': 'loss', 'content': 0.09864607453346252, 'timestamp': '2025-09-30 22:24:04.624804', 'step': 9783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:04.655332', 'step': 9783, 'epoch': 2} {'type': 'loss', 'content': 0.18009309470653534, 'timestamp': '2025-09-30 22:24:04.681309', 'step': 9784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:04.714331', 'step': 9784, 'epoch': 2} {'type': 'loss', 'content': 0.1752222180366516, 'timestamp': '2025-09-30 22:24:04.718925', 'step': 9785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:04.750814', 'step': 9785, 'epoch': 2} {'type': 'loss', 'content': 0.07548054307699203, 'timestamp': '2025-09-30 22:24:04.758022', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:24:13.080885', 'step': 9786, 'epoch': 2} {'type': 'pplx', 'content': 11471.415746248591, 'timestamp': '2025-09-30 22:24:13.085679', 'step': 9786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.116750', 'step': 9786, 'epoch': 2} {'type': 'loss', 'content': 0.10737933963537216, 'timestamp': '2025-09-30 22:24:13.122234', 'step': 9787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:13.167703', 'step': 9787, 'epoch': 2} {'type': 'loss', 'content': 0.05975089222192764, 'timestamp': '2025-09-30 22:24:13.192170', 'step': 9788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.231040', 'step': 9788, 'epoch': 2} {'type': 'loss', 'content': 0.11359665542840958, 'timestamp': '2025-09-30 22:24:13.234992', 'step': 9789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:13.266695', 'step': 9789, 'epoch': 2} {'type': 'loss', 'content': 0.054055873304605484, 'timestamp': '2025-09-30 22:24:13.275398', 'step': 9790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:13.306283', 'step': 9790, 'epoch': 2} {'type': 'loss', 'content': 0.0901213139295578, 'timestamp': '2025-09-30 22:24:13.311155', 'step': 9791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.340911', 'step': 9791, 'epoch': 2} {'type': 'loss', 'content': 0.21069557964801788, 'timestamp': '2025-09-30 22:24:13.365186', 'step': 9792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.395592', 'step': 9792, 'epoch': 2} {'type': 'loss', 'content': 0.0902029499411583, 'timestamp': '2025-09-30 22:24:13.409995', 'step': 9793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:13.439863', 'step': 9793, 'epoch': 2} {'type': 'loss', 'content': 0.13073517382144928, 'timestamp': '2025-09-30 22:24:13.444130', 'step': 9794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.477025', 'step': 9794, 'epoch': 2} {'type': 'loss', 'content': 0.1123570054769516, 'timestamp': '2025-09-30 22:24:13.480939', 'step': 9795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:13.519110', 'step': 9795, 'epoch': 2} {'type': 'loss', 'content': 0.11292939633131027, 'timestamp': '2025-09-30 22:24:13.543845', 'step': 9796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.574487', 'step': 9796, 'epoch': 2} {'type': 'loss', 'content': 0.12951844930648804, 'timestamp': '2025-09-30 22:24:13.579547', 'step': 9797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:13.611834', 'step': 9797, 'epoch': 2} {'type': 'loss', 'content': 0.0718362107872963, 'timestamp': '2025-09-30 22:24:13.614876', 'step': 9798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.645927', 'step': 9798, 'epoch': 2} {'type': 'loss', 'content': 0.07946223020553589, 'timestamp': '2025-09-30 22:24:13.649042', 'step': 9799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.681239', 'step': 9799, 'epoch': 2} {'type': 'loss', 'content': 0.1460408717393875, 'timestamp': '2025-09-30 22:24:13.710753', 'step': 9800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.741860', 'step': 9800, 'epoch': 2} {'type': 'loss', 'content': 0.11026592552661896, 'timestamp': '2025-09-30 22:24:13.744782', 'step': 9801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.787124', 'step': 9801, 'epoch': 2} {'type': 'loss', 'content': 0.10621634125709534, 'timestamp': '2025-09-30 22:24:13.798446', 'step': 9802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.828955', 'step': 9802, 'epoch': 2} {'type': 'loss', 'content': 0.1298074573278427, 'timestamp': '2025-09-30 22:24:13.840675', 'step': 9803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:13.878409', 'step': 9803, 'epoch': 2} {'type': 'loss', 'content': 0.09129005670547485, 'timestamp': '2025-09-30 22:24:13.915268', 'step': 9804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:13.952859', 'step': 9804, 'epoch': 2} {'type': 'loss', 'content': 0.13503998517990112, 'timestamp': '2025-09-30 22:24:13.955875', 'step': 9805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:13.987200', 'step': 9805, 'epoch': 2} {'type': 'loss', 'content': 0.09786174446344376, 'timestamp': '2025-09-30 22:24:13.990542', 'step': 9806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.023017', 'step': 9806, 'epoch': 2} {'type': 'loss', 'content': 0.043795015662908554, 'timestamp': '2025-09-30 22:24:14.026418', 'step': 9807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.057259', 'step': 9807, 'epoch': 2} {'type': 'loss', 'content': 0.08801233768463135, 'timestamp': '2025-09-30 22:24:14.081956', 'step': 9808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.113638', 'step': 9808, 'epoch': 2} {'type': 'loss', 'content': 0.16803288459777832, 'timestamp': '2025-09-30 22:24:14.118654', 'step': 9809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:14.154601', 'step': 9809, 'epoch': 2} {'type': 'loss', 'content': 0.08330448716878891, 'timestamp': '2025-09-30 22:24:14.166608', 'step': 9810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.200099', 'step': 9810, 'epoch': 2} {'type': 'loss', 'content': 0.17284977436065674, 'timestamp': '2025-09-30 22:24:14.212701', 'step': 9811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.255396', 'step': 9811, 'epoch': 2} {'type': 'loss', 'content': 0.056182861328125, 'timestamp': '2025-09-30 22:24:14.279964', 'step': 9812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.316428', 'step': 9812, 'epoch': 2} {'type': 'loss', 'content': 0.06371945887804031, 'timestamp': '2025-09-30 22:24:14.319758', 'step': 9813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:14.359659', 'step': 9813, 'epoch': 2} {'type': 'loss', 'content': 0.03943616524338722, 'timestamp': '2025-09-30 22:24:14.371411', 'step': 9814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.408259', 'step': 9814, 'epoch': 2} {'type': 'loss', 'content': 0.09719965606927872, 'timestamp': '2025-09-30 22:24:14.417766', 'step': 9815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.456825', 'step': 9815, 'epoch': 2} {'type': 'loss', 'content': 0.08719008415937424, 'timestamp': '2025-09-30 22:24:14.494241', 'step': 9816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.545159', 'step': 9816, 'epoch': 2} {'type': 'loss', 'content': 0.11416751891374588, 'timestamp': '2025-09-30 22:24:14.558765', 'step': 9817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:14.590620', 'step': 9817, 'epoch': 2} {'type': 'loss', 'content': 0.09445402026176453, 'timestamp': '2025-09-30 22:24:14.607633', 'step': 9818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:14.642623', 'step': 9818, 'epoch': 2} {'type': 'loss', 'content': 0.11158090084791183, 'timestamp': '2025-09-30 22:24:14.649334', 'step': 9819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.690787', 'step': 9819, 'epoch': 2} {'type': 'loss', 'content': 0.1371619701385498, 'timestamp': '2025-09-30 22:24:14.724774', 'step': 9820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.767919', 'step': 9820, 'epoch': 2} {'type': 'loss', 'content': 0.12202232331037521, 'timestamp': '2025-09-30 22:24:14.775236', 'step': 9821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:14.819565', 'step': 9821, 'epoch': 2} {'type': 'loss', 'content': 0.12538249790668488, 'timestamp': '2025-09-30 22:24:14.829865', 'step': 9822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:14.862656', 'step': 9822, 'epoch': 2} {'type': 'loss', 'content': 0.11662552505731583, 'timestamp': '2025-09-30 22:24:14.877551', 'step': 9823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:14.907810', 'step': 9823, 'epoch': 2} {'type': 'loss', 'content': 0.27365049719810486, 'timestamp': '2025-09-30 22:24:14.940058', 'step': 9824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:14.974420', 'step': 9824, 'epoch': 2} {'type': 'loss', 'content': 0.11486737430095673, 'timestamp': '2025-09-30 22:24:14.991099', 'step': 9825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:15.027491', 'step': 9825, 'epoch': 2} {'type': 'loss', 'content': 0.17090994119644165, 'timestamp': '2025-09-30 22:24:15.039318', 'step': 9826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:15.081927', 'step': 9826, 'epoch': 2} {'type': 'loss', 'content': 0.14444145560264587, 'timestamp': '2025-09-30 22:24:15.085300', 'step': 9827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.121609', 'step': 9827, 'epoch': 2} {'type': 'loss', 'content': 0.09897729754447937, 'timestamp': '2025-09-30 22:24:15.158797', 'step': 9828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.192418', 'step': 9828, 'epoch': 2} {'type': 'loss', 'content': 0.14437314867973328, 'timestamp': '2025-09-30 22:24:15.206436', 'step': 9829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:15.247268', 'step': 9829, 'epoch': 2} {'type': 'loss', 'content': 0.06667973846197128, 'timestamp': '2025-09-30 22:24:15.256910', 'step': 9830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.288937', 'step': 9830, 'epoch': 2} {'type': 'loss', 'content': 0.04327744245529175, 'timestamp': '2025-09-30 22:24:15.294633', 'step': 9831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.334368', 'step': 9831, 'epoch': 2} {'type': 'loss', 'content': 0.07380678504705429, 'timestamp': '2025-09-30 22:24:15.366780', 'step': 9832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.404700', 'step': 9832, 'epoch': 2} {'type': 'loss', 'content': 0.13664565980434418, 'timestamp': '2025-09-30 22:24:15.407641', 'step': 9833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.437122', 'step': 9833, 'epoch': 2} {'type': 'loss', 'content': 0.12398260831832886, 'timestamp': '2025-09-30 22:24:15.442462', 'step': 9834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.475386', 'step': 9834, 'epoch': 2} {'type': 'loss', 'content': 0.06550215929746628, 'timestamp': '2025-09-30 22:24:15.479900', 'step': 9835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:15.509761', 'step': 9835, 'epoch': 2} {'type': 'loss', 'content': 0.1511150449514389, 'timestamp': '2025-09-30 22:24:15.539387', 'step': 9836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.575430', 'step': 9836, 'epoch': 2} {'type': 'loss', 'content': 0.15928757190704346, 'timestamp': '2025-09-30 22:24:15.579093', 'step': 9837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.611011', 'step': 9837, 'epoch': 2} {'type': 'loss', 'content': 0.10365938395261765, 'timestamp': '2025-09-30 22:24:15.615644', 'step': 9838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.645510', 'step': 9838, 'epoch': 2} {'type': 'loss', 'content': 0.10953138768672943, 'timestamp': '2025-09-30 22:24:15.649763', 'step': 9839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:15.688381', 'step': 9839, 'epoch': 2} {'type': 'loss', 'content': 0.0733305960893631, 'timestamp': '2025-09-30 22:24:15.713589', 'step': 9840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.743372', 'step': 9840, 'epoch': 2} {'type': 'loss', 'content': 0.08738236129283905, 'timestamp': '2025-09-30 22:24:15.746684', 'step': 9841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:15.785835', 'step': 9841, 'epoch': 2} {'type': 'loss', 'content': 0.1489228457212448, 'timestamp': '2025-09-30 22:24:15.797670', 'step': 9842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:15.828508', 'step': 9842, 'epoch': 2} {'type': 'loss', 'content': 0.17224010825157166, 'timestamp': '2025-09-30 22:24:15.832719', 'step': 9843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:15.870875', 'step': 9843, 'epoch': 2} {'type': 'loss', 'content': 0.17051294445991516, 'timestamp': '2025-09-30 22:24:15.896207', 'step': 9844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:15.939572', 'step': 9844, 'epoch': 2} {'type': 'loss', 'content': 0.08841204643249512, 'timestamp': '2025-09-30 22:24:15.946477', 'step': 9845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:15.978338', 'step': 9845, 'epoch': 2} {'type': 'loss', 'content': 0.14087556302547455, 'timestamp': '2025-09-30 22:24:15.982568', 'step': 9846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.014462', 'step': 9846, 'epoch': 2} {'type': 'loss', 'content': 0.12054505199193954, 'timestamp': '2025-09-30 22:24:16.018417', 'step': 9847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:16.049792', 'step': 9847, 'epoch': 2} {'type': 'loss', 'content': 0.08588527143001556, 'timestamp': '2025-09-30 22:24:16.077948', 'step': 9848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.107643', 'step': 9848, 'epoch': 2} {'type': 'loss', 'content': 0.15219491720199585, 'timestamp': '2025-09-30 22:24:16.111042', 'step': 9849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.141840', 'step': 9849, 'epoch': 2} {'type': 'loss', 'content': 0.14930720627307892, 'timestamp': '2025-09-30 22:24:16.146822', 'step': 9850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:16.190334', 'step': 9850, 'epoch': 2} {'type': 'loss', 'content': 0.20353277027606964, 'timestamp': '2025-09-30 22:24:16.194515', 'step': 9851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.226812', 'step': 9851, 'epoch': 2} {'type': 'loss', 'content': 0.17283865809440613, 'timestamp': '2025-09-30 22:24:16.252444', 'step': 9852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.282360', 'step': 9852, 'epoch': 2} {'type': 'loss', 'content': 0.16217218339443207, 'timestamp': '2025-09-30 22:24:16.287246', 'step': 9853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.319141', 'step': 9853, 'epoch': 2} {'type': 'loss', 'content': 0.08856075257062912, 'timestamp': '2025-09-30 22:24:16.326889', 'step': 9854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.359983', 'step': 9854, 'epoch': 2} {'type': 'loss', 'content': 0.15290454030036926, 'timestamp': '2025-09-30 22:24:16.366609', 'step': 9855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.402036', 'step': 9855, 'epoch': 2} {'type': 'loss', 'content': 0.12309449911117554, 'timestamp': '2025-09-30 22:24:16.428048', 'step': 9856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.469418', 'step': 9856, 'epoch': 2} {'type': 'loss', 'content': 0.0663672611117363, 'timestamp': '2025-09-30 22:24:16.479689', 'step': 9857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:16.514121', 'step': 9857, 'epoch': 2} {'type': 'loss', 'content': 0.08920257538557053, 'timestamp': '2025-09-30 22:24:16.517071', 'step': 9858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.550498', 'step': 9858, 'epoch': 2} {'type': 'loss', 'content': 0.17138715088367462, 'timestamp': '2025-09-30 22:24:16.554321', 'step': 9859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:16.587325', 'step': 9859, 'epoch': 2} {'type': 'loss', 'content': 0.08261054754257202, 'timestamp': '2025-09-30 22:24:16.614283', 'step': 9860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.646562', 'step': 9860, 'epoch': 2} {'type': 'loss', 'content': 0.12909644842147827, 'timestamp': '2025-09-30 22:24:16.658989', 'step': 9861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.691130', 'step': 9861, 'epoch': 2} {'type': 'loss', 'content': 0.08761923760175705, 'timestamp': '2025-09-30 22:24:16.694564', 'step': 9862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.723885', 'step': 9862, 'epoch': 2} {'type': 'loss', 'content': 0.13352297246456146, 'timestamp': '2025-09-30 22:24:16.734304', 'step': 9863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.766602', 'step': 9863, 'epoch': 2} {'type': 'loss', 'content': 0.10123191773891449, 'timestamp': '2025-09-30 22:24:16.797610', 'step': 9864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:16.832759', 'step': 9864, 'epoch': 2} {'type': 'loss', 'content': 0.0679815262556076, 'timestamp': '2025-09-30 22:24:16.835466', 'step': 9865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.866346', 'step': 9865, 'epoch': 2} {'type': 'loss', 'content': 0.07807674258947372, 'timestamp': '2025-09-30 22:24:16.871101', 'step': 9866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:16.910464', 'step': 9866, 'epoch': 2} {'type': 'loss', 'content': 0.08313469588756561, 'timestamp': '2025-09-30 22:24:16.919308', 'step': 9867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:16.962005', 'step': 9867, 'epoch': 2} {'type': 'loss', 'content': 0.0651979148387909, 'timestamp': '2025-09-30 22:24:16.986088', 'step': 9868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:17.018434', 'step': 9868, 'epoch': 2} {'type': 'loss', 'content': 0.1847735345363617, 'timestamp': '2025-09-30 22:24:17.022711', 'step': 9869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:17.058870', 'step': 9869, 'epoch': 2} {'type': 'loss', 'content': 0.10273387283086777, 'timestamp': '2025-09-30 22:24:17.066433', 'step': 9870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:17.103749', 'step': 9870, 'epoch': 2} {'type': 'loss', 'content': 0.2361983358860016, 'timestamp': '2025-09-30 22:24:17.106266', 'step': 9871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:17.136362', 'step': 9871, 'epoch': 2} {'type': 'loss', 'content': 0.1789214313030243, 'timestamp': '2025-09-30 22:24:17.162428', 'step': 9872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.193100', 'step': 9872, 'epoch': 2} {'type': 'loss', 'content': 0.09654838591814041, 'timestamp': '2025-09-30 22:24:17.195793', 'step': 9873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:17.231382', 'step': 9873, 'epoch': 2} {'type': 'loss', 'content': 0.12119914591312408, 'timestamp': '2025-09-30 22:24:17.234452', 'step': 9874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:17.265749', 'step': 9874, 'epoch': 2} {'type': 'loss', 'content': 0.16619433462619781, 'timestamp': '2025-09-30 22:24:17.269041', 'step': 9875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:17.300082', 'step': 9875, 'epoch': 2} {'type': 'loss', 'content': 0.209843710064888, 'timestamp': '2025-09-30 22:24:17.335822', 'step': 9876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:17.365792', 'step': 9876, 'epoch': 2} {'type': 'loss', 'content': 0.12765595316886902, 'timestamp': '2025-09-30 22:24:17.370239', 'step': 9877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:17.401280', 'step': 9877, 'epoch': 2} {'type': 'loss', 'content': 0.20708277821540833, 'timestamp': '2025-09-30 22:24:17.405143', 'step': 9878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.436035', 'step': 9878, 'epoch': 2} {'type': 'loss', 'content': 0.10427920520305634, 'timestamp': '2025-09-30 22:24:17.439642', 'step': 9879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.471572', 'step': 9879, 'epoch': 2} {'type': 'loss', 'content': 0.09848681837320328, 'timestamp': '2025-09-30 22:24:17.495955', 'step': 9880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:17.526657', 'step': 9880, 'epoch': 2} {'type': 'loss', 'content': 0.1450134813785553, 'timestamp': '2025-09-30 22:24:17.530055', 'step': 9881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:17.561439', 'step': 9881, 'epoch': 2} {'type': 'loss', 'content': 0.10244913399219513, 'timestamp': '2025-09-30 22:24:17.564844', 'step': 9882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:17.599502', 'step': 9882, 'epoch': 2} {'type': 'loss', 'content': 0.15189170837402344, 'timestamp': '2025-09-30 22:24:17.602771', 'step': 9883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:17.633797', 'step': 9883, 'epoch': 2} {'type': 'loss', 'content': 0.07934312522411346, 'timestamp': '2025-09-30 22:24:17.658441', 'step': 9884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.699891', 'step': 9884, 'epoch': 2} {'type': 'loss', 'content': 0.15655343234539032, 'timestamp': '2025-09-30 22:24:17.704657', 'step': 9885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.736250', 'step': 9885, 'epoch': 2} {'type': 'loss', 'content': 0.13803380727767944, 'timestamp': '2025-09-30 22:24:17.743904', 'step': 9886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.782813', 'step': 9886, 'epoch': 2} {'type': 'loss', 'content': 0.07093183696269989, 'timestamp': '2025-09-30 22:24:17.786385', 'step': 9887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:17.828418', 'step': 9887, 'epoch': 2} {'type': 'loss', 'content': 0.17332853376865387, 'timestamp': '2025-09-30 22:24:17.863611', 'step': 9888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.905623', 'step': 9888, 'epoch': 2} {'type': 'loss', 'content': 0.08166313171386719, 'timestamp': '2025-09-30 22:24:17.911349', 'step': 9889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:17.945280', 'step': 9889, 'epoch': 2} {'type': 'loss', 'content': 0.08588635921478271, 'timestamp': '2025-09-30 22:24:17.949543', 'step': 9890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:17.980548', 'step': 9890, 'epoch': 2} {'type': 'loss', 'content': 0.14979048073291779, 'timestamp': '2025-09-30 22:24:17.987953', 'step': 9891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.024062', 'step': 9891, 'epoch': 2} {'type': 'loss', 'content': 0.11309999227523804, 'timestamp': '2025-09-30 22:24:18.049899', 'step': 9892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.084913', 'step': 9892, 'epoch': 2} {'type': 'loss', 'content': 0.11254903674125671, 'timestamp': '2025-09-30 22:24:18.093290', 'step': 9893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:18.125546', 'step': 9893, 'epoch': 2} {'type': 'loss', 'content': 0.09666865319013596, 'timestamp': '2025-09-30 22:24:18.128411', 'step': 9894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.159165', 'step': 9894, 'epoch': 2} {'type': 'loss', 'content': 0.2314508706331253, 'timestamp': '2025-09-30 22:24:18.161921', 'step': 9895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.192258', 'step': 9895, 'epoch': 2} {'type': 'loss', 'content': 0.08773075044155121, 'timestamp': '2025-09-30 22:24:18.216259', 'step': 9896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.247526', 'step': 9896, 'epoch': 2} {'type': 'loss', 'content': 0.0765426903963089, 'timestamp': '2025-09-30 22:24:18.256765', 'step': 9897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.289557', 'step': 9897, 'epoch': 2} {'type': 'loss', 'content': 0.13008464872837067, 'timestamp': '2025-09-30 22:24:18.298561', 'step': 9898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.335100', 'step': 9898, 'epoch': 2} {'type': 'loss', 'content': 0.13283789157867432, 'timestamp': '2025-09-30 22:24:18.340506', 'step': 9899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.376407', 'step': 9899, 'epoch': 2} {'type': 'loss', 'content': 0.14386841654777527, 'timestamp': '2025-09-30 22:24:18.401608', 'step': 9900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.433976', 'step': 9900, 'epoch': 2} {'type': 'loss', 'content': 0.1763429492712021, 'timestamp': '2025-09-30 22:24:18.437443', 'step': 9901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.469626', 'step': 9901, 'epoch': 2} {'type': 'loss', 'content': 0.13786180317401886, 'timestamp': '2025-09-30 22:24:18.473098', 'step': 9902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:18.503857', 'step': 9902, 'epoch': 2} {'type': 'loss', 'content': 0.11009039729833603, 'timestamp': '2025-09-30 22:24:18.521697', 'step': 9903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.560396', 'step': 9903, 'epoch': 2} {'type': 'loss', 'content': 0.14480970799922943, 'timestamp': '2025-09-30 22:24:18.585997', 'step': 9904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:18.619236', 'step': 9904, 'epoch': 2} {'type': 'loss', 'content': 0.024878226220607758, 'timestamp': '2025-09-30 22:24:18.622236', 'step': 9905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.652124', 'step': 9905, 'epoch': 2} {'type': 'loss', 'content': 0.139265239238739, 'timestamp': '2025-09-30 22:24:18.654751', 'step': 9906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:18.686169', 'step': 9906, 'epoch': 2} {'type': 'loss', 'content': 0.047886982560157776, 'timestamp': '2025-09-30 22:24:18.688653', 'step': 9907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.732850', 'step': 9907, 'epoch': 2} {'type': 'loss', 'content': 0.13524046540260315, 'timestamp': '2025-09-30 22:24:18.758725', 'step': 9908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.797125', 'step': 9908, 'epoch': 2} {'type': 'loss', 'content': 0.08028626441955566, 'timestamp': '2025-09-30 22:24:18.803021', 'step': 9909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:18.836841', 'step': 9909, 'epoch': 2} {'type': 'loss', 'content': 0.12556779384613037, 'timestamp': '2025-09-30 22:24:18.841503', 'step': 9910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:18.875932', 'step': 9910, 'epoch': 2} {'type': 'loss', 'content': 0.07015683501958847, 'timestamp': '2025-09-30 22:24:18.878909', 'step': 9911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.910361', 'step': 9911, 'epoch': 2} {'type': 'loss', 'content': 0.12002541869878769, 'timestamp': '2025-09-30 22:24:18.935212', 'step': 9912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:18.965531', 'step': 9912, 'epoch': 2} {'type': 'loss', 'content': 0.16491174697875977, 'timestamp': '2025-09-30 22:24:18.977575', 'step': 9913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.012018', 'step': 9913, 'epoch': 2} {'type': 'loss', 'content': 0.06959590315818787, 'timestamp': '2025-09-30 22:24:19.017567', 'step': 9914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.052202', 'step': 9914, 'epoch': 2} {'type': 'loss', 'content': 0.11207574605941772, 'timestamp': '2025-09-30 22:24:19.057289', 'step': 9915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:19.105082', 'step': 9915, 'epoch': 2} {'type': 'loss', 'content': 0.1236545592546463, 'timestamp': '2025-09-30 22:24:19.131766', 'step': 9916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.164233', 'step': 9916, 'epoch': 2} {'type': 'loss', 'content': 0.08629287779331207, 'timestamp': '2025-09-30 22:24:19.168469', 'step': 9917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.200427', 'step': 9917, 'epoch': 2} {'type': 'loss', 'content': 0.09375323355197906, 'timestamp': '2025-09-30 22:24:19.205986', 'step': 9918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.240901', 'step': 9918, 'epoch': 2} {'type': 'loss', 'content': 0.10466200858354568, 'timestamp': '2025-09-30 22:24:19.243779', 'step': 9919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.273553', 'step': 9919, 'epoch': 2} {'type': 'loss', 'content': 0.08108465373516083, 'timestamp': '2025-09-30 22:24:19.308161', 'step': 9920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.340562', 'step': 9920, 'epoch': 2} {'type': 'loss', 'content': 0.12530751526355743, 'timestamp': '2025-09-30 22:24:19.346449', 'step': 9921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.381128', 'step': 9921, 'epoch': 2} {'type': 'loss', 'content': 0.10782215744256973, 'timestamp': '2025-09-30 22:24:19.383465', 'step': 9922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.417634', 'step': 9922, 'epoch': 2} {'type': 'loss', 'content': 0.16526144742965698, 'timestamp': '2025-09-30 22:24:19.421570', 'step': 9923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.458886', 'step': 9923, 'epoch': 2} {'type': 'loss', 'content': 0.06845215708017349, 'timestamp': '2025-09-30 22:24:19.493209', 'step': 9924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:19.523648', 'step': 9924, 'epoch': 2} {'type': 'loss', 'content': 0.140283465385437, 'timestamp': '2025-09-30 22:24:19.527903', 'step': 9925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.559779', 'step': 9925, 'epoch': 2} {'type': 'loss', 'content': 0.07982046902179718, 'timestamp': '2025-09-30 22:24:19.563991', 'step': 9926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.611161', 'step': 9926, 'epoch': 2} {'type': 'loss', 'content': 0.05892878770828247, 'timestamp': '2025-09-30 22:24:19.628745', 'step': 9927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:19.670815', 'step': 9927, 'epoch': 2} {'type': 'loss', 'content': 0.15056723356246948, 'timestamp': '2025-09-30 22:24:19.698571', 'step': 9928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:19.730848', 'step': 9928, 'epoch': 2} {'type': 'loss', 'content': 0.07711388915777206, 'timestamp': '2025-09-30 22:24:19.736830', 'step': 9929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.770166', 'step': 9929, 'epoch': 2} {'type': 'loss', 'content': 0.27092480659484863, 'timestamp': '2025-09-30 22:24:19.773183', 'step': 9930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.805755', 'step': 9930, 'epoch': 2} {'type': 'loss', 'content': 0.09564390778541565, 'timestamp': '2025-09-30 22:24:19.810345', 'step': 9931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:19.844513', 'step': 9931, 'epoch': 2} {'type': 'loss', 'content': 0.11615488678216934, 'timestamp': '2025-09-30 22:24:19.870079', 'step': 9932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.902363', 'step': 9932, 'epoch': 2} {'type': 'loss', 'content': 0.08491957932710648, 'timestamp': '2025-09-30 22:24:19.907499', 'step': 9933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:19.939879', 'step': 9933, 'epoch': 2} {'type': 'loss', 'content': 0.1687197983264923, 'timestamp': '2025-09-30 22:24:19.945141', 'step': 9934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:19.976166', 'step': 9934, 'epoch': 2} {'type': 'loss', 'content': 0.05295437574386597, 'timestamp': '2025-09-30 22:24:19.980908', 'step': 9935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.024682', 'step': 9935, 'epoch': 2} {'type': 'loss', 'content': 0.11889819800853729, 'timestamp': '2025-09-30 22:24:20.049586', 'step': 9936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.080759', 'step': 9936, 'epoch': 2} {'type': 'loss', 'content': 0.15971910953521729, 'timestamp': '2025-09-30 22:24:20.095482', 'step': 9937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.126418', 'step': 9937, 'epoch': 2} {'type': 'loss', 'content': 0.04067232832312584, 'timestamp': '2025-09-30 22:24:20.131604', 'step': 9938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:20.163266', 'step': 9938, 'epoch': 2} {'type': 'loss', 'content': 0.09906881302595139, 'timestamp': '2025-09-30 22:24:20.169193', 'step': 9939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:20.213172', 'step': 9939, 'epoch': 2} {'type': 'loss', 'content': 0.08014915138483047, 'timestamp': '2025-09-30 22:24:20.239265', 'step': 9940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:20.283883', 'step': 9940, 'epoch': 2} {'type': 'loss', 'content': 0.17831911146640778, 'timestamp': '2025-09-30 22:24:20.299516', 'step': 9941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:20.330584', 'step': 9941, 'epoch': 2} {'type': 'loss', 'content': 0.06788085401058197, 'timestamp': '2025-09-30 22:24:20.343574', 'step': 9942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:20.386285', 'step': 9942, 'epoch': 2} {'type': 'loss', 'content': 0.15077485144138336, 'timestamp': '2025-09-30 22:24:20.390337', 'step': 9943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:20.422850', 'step': 9943, 'epoch': 2} {'type': 'loss', 'content': 0.10429923981428146, 'timestamp': '2025-09-30 22:24:20.451325', 'step': 9944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.484212', 'step': 9944, 'epoch': 2} {'type': 'loss', 'content': 0.142433300614357, 'timestamp': '2025-09-30 22:24:20.488124', 'step': 9945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:20.520480', 'step': 9945, 'epoch': 2} {'type': 'loss', 'content': 0.13330186903476715, 'timestamp': '2025-09-30 22:24:20.524975', 'step': 9946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:20.557483', 'step': 9946, 'epoch': 2} {'type': 'loss', 'content': 0.08342371135950089, 'timestamp': '2025-09-30 22:24:20.563624', 'step': 9947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:20.598697', 'step': 9947, 'epoch': 2} {'type': 'loss', 'content': 0.10848162323236465, 'timestamp': '2025-09-30 22:24:20.635421', 'step': 9948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:20.672989', 'step': 9948, 'epoch': 2} {'type': 'loss', 'content': 0.12937891483306885, 'timestamp': '2025-09-30 22:24:20.686589', 'step': 9949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.721300', 'step': 9949, 'epoch': 2} {'type': 'loss', 'content': 0.07303936779499054, 'timestamp': '2025-09-30 22:24:20.726235', 'step': 9950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.766576', 'step': 9950, 'epoch': 2} {'type': 'loss', 'content': 0.11826648563146591, 'timestamp': '2025-09-30 22:24:20.769543', 'step': 9951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:20.800524', 'step': 9951, 'epoch': 2} {'type': 'loss', 'content': 0.14350712299346924, 'timestamp': '2025-09-30 22:24:20.827182', 'step': 9952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.857668', 'step': 9952, 'epoch': 2} {'type': 'loss', 'content': 0.044414322823286057, 'timestamp': '2025-09-30 22:24:20.860562', 'step': 9953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:20.891271', 'step': 9953, 'epoch': 2} {'type': 'loss', 'content': 0.24481548368930817, 'timestamp': '2025-09-30 22:24:20.894770', 'step': 9954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:20.925950', 'step': 9954, 'epoch': 2} {'type': 'loss', 'content': 0.19392617046833038, 'timestamp': '2025-09-30 22:24:20.929747', 'step': 9955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:20.960161', 'step': 9955, 'epoch': 2} {'type': 'loss', 'content': 0.08654773980379105, 'timestamp': '2025-09-30 22:24:20.985533', 'step': 9956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.016721', 'step': 9956, 'epoch': 2} {'type': 'loss', 'content': 0.09338493645191193, 'timestamp': '2025-09-30 22:24:21.020362', 'step': 9957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:21.058846', 'step': 9957, 'epoch': 2} {'type': 'loss', 'content': 0.13086570799350739, 'timestamp': '2025-09-30 22:24:21.065836', 'step': 9958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.097446', 'step': 9958, 'epoch': 2} {'type': 'loss', 'content': 0.07199643552303314, 'timestamp': '2025-09-30 22:24:21.104364', 'step': 9959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:21.134810', 'step': 9959, 'epoch': 2} {'type': 'loss', 'content': 0.1363479197025299, 'timestamp': '2025-09-30 22:24:21.161846', 'step': 9960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.196915', 'step': 9960, 'epoch': 2} {'type': 'loss', 'content': 0.14591294527053833, 'timestamp': '2025-09-30 22:24:21.199610', 'step': 9961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.230923', 'step': 9961, 'epoch': 2} {'type': 'loss', 'content': 0.14740602672100067, 'timestamp': '2025-09-30 22:24:21.234445', 'step': 9962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.265673', 'step': 9962, 'epoch': 2} {'type': 'loss', 'content': 0.0814705342054367, 'timestamp': '2025-09-30 22:24:21.272357', 'step': 9963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:21.310037', 'step': 9963, 'epoch': 2} {'type': 'loss', 'content': 0.07794902473688126, 'timestamp': '2025-09-30 22:24:21.334501', 'step': 9964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:21.365587', 'step': 9964, 'epoch': 2} {'type': 'loss', 'content': 0.17285332083702087, 'timestamp': '2025-09-30 22:24:21.375876', 'step': 9965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.407786', 'step': 9965, 'epoch': 2} {'type': 'loss', 'content': 0.06924734264612198, 'timestamp': '2025-09-30 22:24:21.411641', 'step': 9966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.443929', 'step': 9966, 'epoch': 2} {'type': 'loss', 'content': 0.08402824401855469, 'timestamp': '2025-09-30 22:24:21.454389', 'step': 9967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.486175', 'step': 9967, 'epoch': 2} {'type': 'loss', 'content': 0.25735002756118774, 'timestamp': '2025-09-30 22:24:21.511847', 'step': 9968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:21.549054', 'step': 9968, 'epoch': 2} {'type': 'loss', 'content': 0.04763568192720413, 'timestamp': '2025-09-30 22:24:21.561451', 'step': 9969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.594434', 'step': 9969, 'epoch': 2} {'type': 'loss', 'content': 0.09836279600858688, 'timestamp': '2025-09-30 22:24:21.599324', 'step': 9970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.631429', 'step': 9970, 'epoch': 2} {'type': 'loss', 'content': 0.14738303422927856, 'timestamp': '2025-09-30 22:24:21.636412', 'step': 9971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:21.668773', 'step': 9971, 'epoch': 2} {'type': 'loss', 'content': 0.085185207426548, 'timestamp': '2025-09-30 22:24:21.693769', 'step': 9972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.735778', 'step': 9972, 'epoch': 2} {'type': 'loss', 'content': 0.19479647278785706, 'timestamp': '2025-09-30 22:24:21.739744', 'step': 9973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.778307', 'step': 9973, 'epoch': 2} {'type': 'loss', 'content': 0.11602171510457993, 'timestamp': '2025-09-30 22:24:21.780722', 'step': 9974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.812998', 'step': 9974, 'epoch': 2} {'type': 'loss', 'content': 0.12776589393615723, 'timestamp': '2025-09-30 22:24:21.816919', 'step': 9975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:21.849060', 'step': 9975, 'epoch': 2} {'type': 'loss', 'content': 0.12081578373908997, 'timestamp': '2025-09-30 22:24:21.875907', 'step': 9976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.910925', 'step': 9976, 'epoch': 2} {'type': 'loss', 'content': 0.12299735844135284, 'timestamp': '2025-09-30 22:24:21.927253', 'step': 9977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:21.968571', 'step': 9977, 'epoch': 2} {'type': 'loss', 'content': 0.07549894601106644, 'timestamp': '2025-09-30 22:24:21.973249', 'step': 9978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:22.005209', 'step': 9978, 'epoch': 2} {'type': 'loss', 'content': 0.1253470480442047, 'timestamp': '2025-09-30 22:24:22.010498', 'step': 9979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.043317', 'step': 9979, 'epoch': 2} {'type': 'loss', 'content': 0.09179125726222992, 'timestamp': '2025-09-30 22:24:22.067981', 'step': 9980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.098975', 'step': 9980, 'epoch': 2} {'type': 'loss', 'content': 0.17319488525390625, 'timestamp': '2025-09-30 22:24:22.101260', 'step': 9981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:22.141077', 'step': 9981, 'epoch': 2} {'type': 'loss', 'content': 0.06131491810083389, 'timestamp': '2025-09-30 22:24:22.148245', 'step': 9982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.179907', 'step': 9982, 'epoch': 2} {'type': 'loss', 'content': 0.0730462446808815, 'timestamp': '2025-09-30 22:24:22.182835', 'step': 9983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.215346', 'step': 9983, 'epoch': 2} {'type': 'loss', 'content': 0.08038453012704849, 'timestamp': '2025-09-30 22:24:22.240166', 'step': 9984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:22.271137', 'step': 9984, 'epoch': 2} {'type': 'loss', 'content': 0.12345672398805618, 'timestamp': '2025-09-30 22:24:22.274577', 'step': 9985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.314439', 'step': 9985, 'epoch': 2} {'type': 'loss', 'content': 0.07360376417636871, 'timestamp': '2025-09-30 22:24:22.325148', 'step': 9986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:22.368008', 'step': 9986, 'epoch': 2} {'type': 'loss', 'content': 0.1292784959077835, 'timestamp': '2025-09-30 22:24:22.372338', 'step': 9987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.406144', 'step': 9987, 'epoch': 2} {'type': 'loss', 'content': 0.14963805675506592, 'timestamp': '2025-09-30 22:24:22.432452', 'step': 9988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:22.472565', 'step': 9988, 'epoch': 2} {'type': 'loss', 'content': 0.20504051446914673, 'timestamp': '2025-09-30 22:24:22.475738', 'step': 9989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.506587', 'step': 9989, 'epoch': 2} {'type': 'loss', 'content': 0.15736587345600128, 'timestamp': '2025-09-30 22:24:22.511668', 'step': 9990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:22.546756', 'step': 9990, 'epoch': 2} {'type': 'loss', 'content': 0.14288410544395447, 'timestamp': '2025-09-30 22:24:22.550054', 'step': 9991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.580993', 'step': 9991, 'epoch': 2} {'type': 'loss', 'content': 0.1431429088115692, 'timestamp': '2025-09-30 22:24:22.608135', 'step': 9992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.640115', 'step': 9992, 'epoch': 2} {'type': 'loss', 'content': 0.12479737401008606, 'timestamp': '2025-09-30 22:24:22.649646', 'step': 9993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.680922', 'step': 9993, 'epoch': 2} {'type': 'loss', 'content': 0.09815603494644165, 'timestamp': '2025-09-30 22:24:22.684295', 'step': 9994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.722671', 'step': 9994, 'epoch': 2} {'type': 'loss', 'content': 0.13671369850635529, 'timestamp': '2025-09-30 22:24:22.725952', 'step': 9995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.762722', 'step': 9995, 'epoch': 2} {'type': 'loss', 'content': 0.16911731660366058, 'timestamp': '2025-09-30 22:24:22.789222', 'step': 9996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.825384', 'step': 9996, 'epoch': 2} {'type': 'loss', 'content': 0.055354680866003036, 'timestamp': '2025-09-30 22:24:22.828816', 'step': 9997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.865016', 'step': 9997, 'epoch': 2} {'type': 'loss', 'content': 0.08713250607252121, 'timestamp': '2025-09-30 22:24:22.870281', 'step': 9998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:22.907469', 'step': 9998, 'epoch': 2} {'type': 'loss', 'content': 0.1167915090918541, 'timestamp': '2025-09-30 22:24:22.911045', 'step': 9999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:22.953169', 'step': 9999, 'epoch': 2} {'type': 'loss', 'content': 0.12836337089538574, 'timestamp': '2025-09-30 22:24:22.977522', 'step': 10000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10000', 'timestamp': '2025-09-30 22:24:28.080927', 'step': 10000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:28.113025', 'step': 10000, 'epoch': 2} {'type': 'loss', 'content': 0.14655186235904694, 'timestamp': '2025-09-30 22:24:28.116667', 'step': 10001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:28.157174', 'step': 10001, 'epoch': 2} {'type': 'loss', 'content': 0.08485198020935059, 'timestamp': '2025-09-30 22:24:28.163811', 'step': 10002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.196408', 'step': 10002, 'epoch': 2} {'type': 'loss', 'content': 0.10087747871875763, 'timestamp': '2025-09-30 22:24:28.199744', 'step': 10003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.230886', 'step': 10003, 'epoch': 2} {'type': 'loss', 'content': 0.13310208916664124, 'timestamp': '2025-09-30 22:24:28.257343', 'step': 10004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.289287', 'step': 10004, 'epoch': 2} {'type': 'loss', 'content': 0.12076739966869354, 'timestamp': '2025-09-30 22:24:28.302606', 'step': 10005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:28.338324', 'step': 10005, 'epoch': 2} {'type': 'loss', 'content': 0.12072677910327911, 'timestamp': '2025-09-30 22:24:28.343633', 'step': 10006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:28.385701', 'step': 10006, 'epoch': 2} {'type': 'loss', 'content': 0.13528145849704742, 'timestamp': '2025-09-30 22:24:28.390649', 'step': 10007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:28.430093', 'step': 10007, 'epoch': 2} {'type': 'loss', 'content': 0.21942003071308136, 'timestamp': '2025-09-30 22:24:28.454833', 'step': 10008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:28.485655', 'step': 10008, 'epoch': 2} {'type': 'loss', 'content': 0.10850202292203903, 'timestamp': '2025-09-30 22:24:28.489818', 'step': 10009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:28.522867', 'step': 10009, 'epoch': 2} {'type': 'loss', 'content': 0.08858541399240494, 'timestamp': '2025-09-30 22:24:28.539521', 'step': 10010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.575267', 'step': 10010, 'epoch': 2} {'type': 'loss', 'content': 0.1459961235523224, 'timestamp': '2025-09-30 22:24:28.578730', 'step': 10011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.610534', 'step': 10011, 'epoch': 2} {'type': 'loss', 'content': 0.06556051224470139, 'timestamp': '2025-09-30 22:24:28.648277', 'step': 10012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:28.686116', 'step': 10012, 'epoch': 2} {'type': 'loss', 'content': 0.13584129512310028, 'timestamp': '2025-09-30 22:24:28.689739', 'step': 10013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:28.722721', 'step': 10013, 'epoch': 2} {'type': 'loss', 'content': 0.12490735948085785, 'timestamp': '2025-09-30 22:24:28.725685', 'step': 10014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:28.763320', 'step': 10014, 'epoch': 2} {'type': 'loss', 'content': 0.11950189620256424, 'timestamp': '2025-09-30 22:24:28.772653', 'step': 10015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:28.817721', 'step': 10015, 'epoch': 2} {'type': 'loss', 'content': 0.05971841141581535, 'timestamp': '2025-09-30 22:24:28.844281', 'step': 10016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:28.885339', 'step': 10016, 'epoch': 2} {'type': 'loss', 'content': 0.05285380035638809, 'timestamp': '2025-09-30 22:24:28.888009', 'step': 10017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:28.930648', 'step': 10017, 'epoch': 2} {'type': 'loss', 'content': 0.09407059848308563, 'timestamp': '2025-09-30 22:24:28.934331', 'step': 10018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:28.968740', 'step': 10018, 'epoch': 2} {'type': 'loss', 'content': 0.04325657710433006, 'timestamp': '2025-09-30 22:24:28.972768', 'step': 10019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:29.009786', 'step': 10019, 'epoch': 2} {'type': 'loss', 'content': 0.13792122900485992, 'timestamp': '2025-09-30 22:24:29.041540', 'step': 10020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:29.079535', 'step': 10020, 'epoch': 2} {'type': 'loss', 'content': 0.07375933974981308, 'timestamp': '2025-09-30 22:24:29.085436', 'step': 10021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:29.141800', 'step': 10021, 'epoch': 2} {'type': 'loss', 'content': 0.02675466053187847, 'timestamp': '2025-09-30 22:24:29.146695', 'step': 10022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:29.180575', 'step': 10022, 'epoch': 2} {'type': 'loss', 'content': 0.08025018125772476, 'timestamp': '2025-09-30 22:24:29.184784', 'step': 10023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:29.216783', 'step': 10023, 'epoch': 2} {'type': 'loss', 'content': 0.03830605745315552, 'timestamp': '2025-09-30 22:24:29.243958', 'step': 10024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:29.282837', 'step': 10024, 'epoch': 2} {'type': 'loss', 'content': 0.10868360847234726, 'timestamp': '2025-09-30 22:24:29.284916', 'step': 10025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:29.332516', 'step': 10025, 'epoch': 2} {'type': 'loss', 'content': 0.08370281010866165, 'timestamp': '2025-09-30 22:24:29.337867', 'step': 10026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:29.378345', 'step': 10026, 'epoch': 2} {'type': 'loss', 'content': 0.10787294805049896, 'timestamp': '2025-09-30 22:24:29.381942', 'step': 10027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:29.424802', 'step': 10027, 'epoch': 2} {'type': 'loss', 'content': 0.12336046248674393, 'timestamp': '2025-09-30 22:24:29.449766', 'step': 10028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:29.488497', 'step': 10028, 'epoch': 2} {'type': 'loss', 'content': 0.08466053009033203, 'timestamp': '2025-09-30 22:24:29.492849', 'step': 10029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:29.531144', 'step': 10029, 'epoch': 2} {'type': 'loss', 'content': 0.10718225687742233, 'timestamp': '2025-09-30 22:24:29.540455', 'step': 10030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:29.585830', 'step': 10030, 'epoch': 2} {'type': 'loss', 'content': 0.156266450881958, 'timestamp': '2025-09-30 22:24:29.589858', 'step': 10031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:29.657956', 'step': 10031, 'epoch': 2} {'type': 'loss', 'content': 0.08089573681354523, 'timestamp': '2025-09-30 22:24:29.684160', 'step': 10032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:29.721569', 'step': 10032, 'epoch': 2} {'type': 'loss', 'content': 0.1022137850522995, 'timestamp': '2025-09-30 22:24:29.724588', 'step': 10033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:29.774571', 'step': 10033, 'epoch': 2} {'type': 'loss', 'content': 0.04950062558054924, 'timestamp': '2025-09-30 22:24:29.780952', 'step': 10034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:29.818771', 'step': 10034, 'epoch': 2} {'type': 'loss', 'content': 0.10486507415771484, 'timestamp': '2025-09-30 22:24:29.834990', 'step': 10035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:29.877176', 'step': 10035, 'epoch': 2} {'type': 'loss', 'content': 0.04995270073413849, 'timestamp': '2025-09-30 22:24:29.904555', 'step': 10036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:29.945096', 'step': 10036, 'epoch': 2} {'type': 'loss', 'content': 0.05565943568944931, 'timestamp': '2025-09-30 22:24:29.953250', 'step': 10037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:29.997599', 'step': 10037, 'epoch': 2} {'type': 'loss', 'content': 0.16141989827156067, 'timestamp': '2025-09-30 22:24:30.002773', 'step': 10038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:30.038543', 'step': 10038, 'epoch': 2} {'type': 'loss', 'content': 0.11787740886211395, 'timestamp': '2025-09-30 22:24:30.041562', 'step': 10039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.074966', 'step': 10039, 'epoch': 2} {'type': 'loss', 'content': 0.07339223474264145, 'timestamp': '2025-09-30 22:24:30.100000', 'step': 10040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.130470', 'step': 10040, 'epoch': 2} {'type': 'loss', 'content': 0.09952656924724579, 'timestamp': '2025-09-30 22:24:30.135803', 'step': 10041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.169338', 'step': 10041, 'epoch': 2} {'type': 'loss', 'content': 0.15064285695552826, 'timestamp': '2025-09-30 22:24:30.172202', 'step': 10042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:30.202872', 'step': 10042, 'epoch': 2} {'type': 'loss', 'content': 0.13965065777301788, 'timestamp': '2025-09-30 22:24:30.214634', 'step': 10043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.245744', 'step': 10043, 'epoch': 2} {'type': 'loss', 'content': 0.11388887465000153, 'timestamp': '2025-09-30 22:24:30.270508', 'step': 10044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:30.301496', 'step': 10044, 'epoch': 2} {'type': 'loss', 'content': 0.10823337733745575, 'timestamp': '2025-09-30 22:24:30.306798', 'step': 10045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:30.338551', 'step': 10045, 'epoch': 2} {'type': 'loss', 'content': 0.1079636737704277, 'timestamp': '2025-09-30 22:24:30.341587', 'step': 10046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.373355', 'step': 10046, 'epoch': 2} {'type': 'loss', 'content': 0.11785658448934555, 'timestamp': '2025-09-30 22:24:30.376145', 'step': 10047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.407267', 'step': 10047, 'epoch': 2} {'type': 'loss', 'content': 0.12551584839820862, 'timestamp': '2025-09-30 22:24:30.439413', 'step': 10048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.478439', 'step': 10048, 'epoch': 2} {'type': 'loss', 'content': 0.07838879525661469, 'timestamp': '2025-09-30 22:24:30.481757', 'step': 10049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.516162', 'step': 10049, 'epoch': 2} {'type': 'loss', 'content': 0.0609067939221859, 'timestamp': '2025-09-30 22:24:30.519112', 'step': 10050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.549437', 'step': 10050, 'epoch': 2} {'type': 'loss', 'content': 0.09702742099761963, 'timestamp': '2025-09-30 22:24:30.553385', 'step': 10051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.585047', 'step': 10051, 'epoch': 2} {'type': 'loss', 'content': 0.06616739183664322, 'timestamp': '2025-09-30 22:24:30.609489', 'step': 10052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.647119', 'step': 10052, 'epoch': 2} {'type': 'loss', 'content': 0.1369854360818863, 'timestamp': '2025-09-30 22:24:30.649703', 'step': 10053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.680469', 'step': 10053, 'epoch': 2} {'type': 'loss', 'content': 0.08538056164979935, 'timestamp': '2025-09-30 22:24:30.684240', 'step': 10054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.715169', 'step': 10054, 'epoch': 2} {'type': 'loss', 'content': 0.0421556793153286, 'timestamp': '2025-09-30 22:24:30.719771', 'step': 10055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.757956', 'step': 10055, 'epoch': 2} {'type': 'loss', 'content': 0.12172184884548187, 'timestamp': '2025-09-30 22:24:30.786996', 'step': 10056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.824654', 'step': 10056, 'epoch': 2} {'type': 'loss', 'content': 0.031035207211971283, 'timestamp': '2025-09-30 22:24:30.836689', 'step': 10057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:30.876618', 'step': 10057, 'epoch': 2} {'type': 'loss', 'content': 0.09128263592720032, 'timestamp': '2025-09-30 22:24:30.882582', 'step': 10058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:30.923534', 'step': 10058, 'epoch': 2} {'type': 'loss', 'content': 0.10469964146614075, 'timestamp': '2025-09-30 22:24:30.930003', 'step': 10059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:30.962535', 'step': 10059, 'epoch': 2} {'type': 'loss', 'content': 0.08557891100645065, 'timestamp': '2025-09-30 22:24:30.990099', 'step': 10060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:31.021821', 'step': 10060, 'epoch': 2} {'type': 'loss', 'content': 0.07819725573062897, 'timestamp': '2025-09-30 22:24:31.034535', 'step': 10061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.075104', 'step': 10061, 'epoch': 2} {'type': 'loss', 'content': 0.0622679628431797, 'timestamp': '2025-09-30 22:24:31.090241', 'step': 10062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:31.132352', 'step': 10062, 'epoch': 2} {'type': 'loss', 'content': 0.06630346179008484, 'timestamp': '2025-09-30 22:24:31.136709', 'step': 10063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.169105', 'step': 10063, 'epoch': 2} {'type': 'loss', 'content': 0.08220531791448593, 'timestamp': '2025-09-30 22:24:31.195319', 'step': 10064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:31.236923', 'step': 10064, 'epoch': 2} {'type': 'loss', 'content': 0.06561817973852158, 'timestamp': '2025-09-30 22:24:31.244670', 'step': 10065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:31.281895', 'step': 10065, 'epoch': 2} {'type': 'loss', 'content': 0.19017915427684784, 'timestamp': '2025-09-30 22:24:31.297444', 'step': 10066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:31.337442', 'step': 10066, 'epoch': 2} {'type': 'loss', 'content': 0.06227768957614899, 'timestamp': '2025-09-30 22:24:31.349793', 'step': 10067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:31.381900', 'step': 10067, 'epoch': 2} {'type': 'loss', 'content': 0.07897069305181503, 'timestamp': '2025-09-30 22:24:31.405796', 'step': 10068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.445597', 'step': 10068, 'epoch': 2} {'type': 'loss', 'content': 0.21417000889778137, 'timestamp': '2025-09-30 22:24:31.450020', 'step': 10069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:31.489689', 'step': 10069, 'epoch': 2} {'type': 'loss', 'content': 0.14101935923099518, 'timestamp': '2025-09-30 22:24:31.494253', 'step': 10070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.527184', 'step': 10070, 'epoch': 2} {'type': 'loss', 'content': 0.07014063745737076, 'timestamp': '2025-09-30 22:24:31.531283', 'step': 10071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.566779', 'step': 10071, 'epoch': 2} {'type': 'loss', 'content': 0.08923019468784332, 'timestamp': '2025-09-30 22:24:31.591494', 'step': 10072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:31.623135', 'step': 10072, 'epoch': 2} {'type': 'loss', 'content': 0.08407066762447357, 'timestamp': '2025-09-30 22:24:31.628659', 'step': 10073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.670097', 'step': 10073, 'epoch': 2} {'type': 'loss', 'content': 0.09793251752853394, 'timestamp': '2025-09-30 22:24:31.680867', 'step': 10074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:31.722698', 'step': 10074, 'epoch': 2} {'type': 'loss', 'content': 0.09057337045669556, 'timestamp': '2025-09-30 22:24:31.735381', 'step': 10075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:31.773041', 'step': 10075, 'epoch': 2} {'type': 'loss', 'content': 0.14500738680362701, 'timestamp': '2025-09-30 22:24:31.802597', 'step': 10076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:31.845324', 'step': 10076, 'epoch': 2} {'type': 'loss', 'content': 0.17797981202602386, 'timestamp': '2025-09-30 22:24:31.854237', 'step': 10077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:31.895160', 'step': 10077, 'epoch': 2} {'type': 'loss', 'content': 0.08862028270959854, 'timestamp': '2025-09-30 22:24:31.898871', 'step': 10078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:31.931689', 'step': 10078, 'epoch': 2} {'type': 'loss', 'content': 0.1736934781074524, 'timestamp': '2025-09-30 22:24:31.935160', 'step': 10079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:31.966061', 'step': 10079, 'epoch': 2} {'type': 'loss', 'content': 0.09363753348588943, 'timestamp': '2025-09-30 22:24:31.991972', 'step': 10080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.025780', 'step': 10080, 'epoch': 2} {'type': 'loss', 'content': 0.06459632515907288, 'timestamp': '2025-09-30 22:24:32.029341', 'step': 10081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.063458', 'step': 10081, 'epoch': 2} {'type': 'loss', 'content': 0.06449612230062485, 'timestamp': '2025-09-30 22:24:32.066536', 'step': 10082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.107034', 'step': 10082, 'epoch': 2} {'type': 'loss', 'content': 0.11172615736722946, 'timestamp': '2025-09-30 22:24:32.111330', 'step': 10083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.148318', 'step': 10083, 'epoch': 2} {'type': 'loss', 'content': 0.1453808695077896, 'timestamp': '2025-09-30 22:24:32.174604', 'step': 10084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.205865', 'step': 10084, 'epoch': 2} {'type': 'loss', 'content': 0.09621058404445648, 'timestamp': '2025-09-30 22:24:32.208943', 'step': 10085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:32.244958', 'step': 10085, 'epoch': 2} {'type': 'loss', 'content': 0.08669625222682953, 'timestamp': '2025-09-30 22:24:32.254228', 'step': 10086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.288963', 'step': 10086, 'epoch': 2} {'type': 'loss', 'content': 0.09746094793081284, 'timestamp': '2025-09-30 22:24:32.292048', 'step': 10087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:32.323596', 'step': 10087, 'epoch': 2} {'type': 'loss', 'content': 0.0585591197013855, 'timestamp': '2025-09-30 22:24:32.348046', 'step': 10088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:32.381279', 'step': 10088, 'epoch': 2} {'type': 'loss', 'content': 0.09322819113731384, 'timestamp': '2025-09-30 22:24:32.390282', 'step': 10089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.428392', 'step': 10089, 'epoch': 2} {'type': 'loss', 'content': 0.05019785091280937, 'timestamp': '2025-09-30 22:24:32.432481', 'step': 10090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.468702', 'step': 10090, 'epoch': 2} {'type': 'loss', 'content': 0.14778399467468262, 'timestamp': '2025-09-30 22:24:32.478070', 'step': 10091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.510591', 'step': 10091, 'epoch': 2} {'type': 'loss', 'content': 0.09132184833288193, 'timestamp': '2025-09-30 22:24:32.537229', 'step': 10092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:32.572764', 'step': 10092, 'epoch': 2} {'type': 'loss', 'content': 0.10934454947710037, 'timestamp': '2025-09-30 22:24:32.581494', 'step': 10093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.615049', 'step': 10093, 'epoch': 2} {'type': 'loss', 'content': 0.1139233335852623, 'timestamp': '2025-09-30 22:24:32.621341', 'step': 10094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:32.655342', 'step': 10094, 'epoch': 2} {'type': 'loss', 'content': 0.08672229945659637, 'timestamp': '2025-09-30 22:24:32.663858', 'step': 10095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:32.701089', 'step': 10095, 'epoch': 2} {'type': 'loss', 'content': 0.11177297681570053, 'timestamp': '2025-09-30 22:24:32.727154', 'step': 10096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.761163', 'step': 10096, 'epoch': 2} {'type': 'loss', 'content': 0.07095874100923538, 'timestamp': '2025-09-30 22:24:32.765267', 'step': 10097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.798987', 'step': 10097, 'epoch': 2} {'type': 'loss', 'content': 0.08652639389038086, 'timestamp': '2025-09-30 22:24:32.802198', 'step': 10098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:32.838346', 'step': 10098, 'epoch': 2} {'type': 'loss', 'content': 0.12470532953739166, 'timestamp': '2025-09-30 22:24:32.842091', 'step': 10099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:32.873611', 'step': 10099, 'epoch': 2} {'type': 'loss', 'content': 0.11651425063610077, 'timestamp': '2025-09-30 22:24:32.900436', 'step': 10100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.931247', 'step': 10100, 'epoch': 2} {'type': 'loss', 'content': 0.19392122328281403, 'timestamp': '2025-09-30 22:24:32.937280', 'step': 10101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:32.970893', 'step': 10101, 'epoch': 2} {'type': 'loss', 'content': 0.09701879322528839, 'timestamp': '2025-09-30 22:24:32.975212', 'step': 10102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:33.009391', 'step': 10102, 'epoch': 2} {'type': 'loss', 'content': 0.08200474828481674, 'timestamp': '2025-09-30 22:24:33.014359', 'step': 10103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:33.047788', 'step': 10103, 'epoch': 2} {'type': 'loss', 'content': 0.06755732744932175, 'timestamp': '2025-09-30 22:24:33.072796', 'step': 10104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:33.104430', 'step': 10104, 'epoch': 2} {'type': 'loss', 'content': 0.0837993249297142, 'timestamp': '2025-09-30 22:24:33.108757', 'step': 10105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:33.142164', 'step': 10105, 'epoch': 2} {'type': 'loss', 'content': 0.09480007737874985, 'timestamp': '2025-09-30 22:24:33.146850', 'step': 10106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:33.181625', 'step': 10106, 'epoch': 2} {'type': 'loss', 'content': 0.0931878387928009, 'timestamp': '2025-09-30 22:24:33.184928', 'step': 10107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:33.221102', 'step': 10107, 'epoch': 2} {'type': 'loss', 'content': 0.06048243120312691, 'timestamp': '2025-09-30 22:24:33.246476', 'step': 10108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.289668', 'step': 10108, 'epoch': 2} {'type': 'loss', 'content': 0.09732294082641602, 'timestamp': '2025-09-30 22:24:33.293153', 'step': 10109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.325146', 'step': 10109, 'epoch': 2} {'type': 'loss', 'content': 0.1363295316696167, 'timestamp': '2025-09-30 22:24:33.329411', 'step': 10110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.360992', 'step': 10110, 'epoch': 2} {'type': 'loss', 'content': 0.1409766972064972, 'timestamp': '2025-09-30 22:24:33.364984', 'step': 10111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:33.399914', 'step': 10111, 'epoch': 2} {'type': 'loss', 'content': 0.060845714062452316, 'timestamp': '2025-09-30 22:24:33.425725', 'step': 10112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:33.458096', 'step': 10112, 'epoch': 2} {'type': 'loss', 'content': 0.114528588950634, 'timestamp': '2025-09-30 22:24:33.463737', 'step': 10113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.501321', 'step': 10113, 'epoch': 2} {'type': 'loss', 'content': 0.043575163930654526, 'timestamp': '2025-09-30 22:24:33.505814', 'step': 10114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:33.538436', 'step': 10114, 'epoch': 2} {'type': 'loss', 'content': 0.09016270190477371, 'timestamp': '2025-09-30 22:24:33.544261', 'step': 10115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.582589', 'step': 10115, 'epoch': 2} {'type': 'loss', 'content': 0.07109890133142471, 'timestamp': '2025-09-30 22:24:33.612197', 'step': 10116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:33.644401', 'step': 10116, 'epoch': 2} {'type': 'loss', 'content': 0.08232548087835312, 'timestamp': '2025-09-30 22:24:33.649876', 'step': 10117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:33.682261', 'step': 10117, 'epoch': 2} {'type': 'loss', 'content': 0.056090231984853745, 'timestamp': '2025-09-30 22:24:33.686517', 'step': 10118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.720171', 'step': 10118, 'epoch': 2} {'type': 'loss', 'content': 0.06522433459758759, 'timestamp': '2025-09-30 22:24:33.723513', 'step': 10119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:33.755487', 'step': 10119, 'epoch': 2} {'type': 'loss', 'content': 0.17258015275001526, 'timestamp': '2025-09-30 22:24:33.780789', 'step': 10120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:33.823033', 'step': 10120, 'epoch': 2} {'type': 'loss', 'content': 0.19177551567554474, 'timestamp': '2025-09-30 22:24:33.834214', 'step': 10121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.878788', 'step': 10121, 'epoch': 2} {'type': 'loss', 'content': 0.08410130441188812, 'timestamp': '2025-09-30 22:24:33.894535', 'step': 10122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:33.932410', 'step': 10122, 'epoch': 2} {'type': 'loss', 'content': 0.08682991564273834, 'timestamp': '2025-09-30 22:24:33.936683', 'step': 10123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:33.974802', 'step': 10123, 'epoch': 2} {'type': 'loss', 'content': 0.09671139717102051, 'timestamp': '2025-09-30 22:24:33.999954', 'step': 10124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.041691', 'step': 10124, 'epoch': 2} {'type': 'loss', 'content': 0.13447414338588715, 'timestamp': '2025-09-30 22:24:34.047035', 'step': 10125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.081049', 'step': 10125, 'epoch': 2} {'type': 'loss', 'content': 0.06800653040409088, 'timestamp': '2025-09-30 22:24:34.088831', 'step': 10126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:34.124036', 'step': 10126, 'epoch': 2} {'type': 'loss', 'content': 0.155930295586586, 'timestamp': '2025-09-30 22:24:34.133844', 'step': 10127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:34.167419', 'step': 10127, 'epoch': 2} {'type': 'loss', 'content': 0.12184368818998337, 'timestamp': '2025-09-30 22:24:34.192472', 'step': 10128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:34.229927', 'step': 10128, 'epoch': 2} {'type': 'loss', 'content': 0.12229910492897034, 'timestamp': '2025-09-30 22:24:34.234215', 'step': 10129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:34.267475', 'step': 10129, 'epoch': 2} {'type': 'loss', 'content': 0.09055820852518082, 'timestamp': '2025-09-30 22:24:34.272031', 'step': 10130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.305348', 'step': 10130, 'epoch': 2} {'type': 'loss', 'content': 0.07649930566549301, 'timestamp': '2025-09-30 22:24:34.308011', 'step': 10131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.341874', 'step': 10131, 'epoch': 2} {'type': 'loss', 'content': 0.06830915063619614, 'timestamp': '2025-09-30 22:24:34.371418', 'step': 10132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.412952', 'step': 10132, 'epoch': 2} {'type': 'loss', 'content': 0.0928204283118248, 'timestamp': '2025-09-30 22:24:34.415772', 'step': 10133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.452262', 'step': 10133, 'epoch': 2} {'type': 'loss', 'content': 0.08706485480070114, 'timestamp': '2025-09-30 22:24:34.457733', 'step': 10134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.499103', 'step': 10134, 'epoch': 2} {'type': 'loss', 'content': 0.12836770713329315, 'timestamp': '2025-09-30 22:24:34.502741', 'step': 10135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:24:34.543705', 'step': 10135, 'epoch': 2} {'type': 'loss', 'content': 0.07363598048686981, 'timestamp': '2025-09-30 22:24:34.570236', 'step': 10136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:34.613322', 'step': 10136, 'epoch': 2} {'type': 'loss', 'content': 0.10284873843193054, 'timestamp': '2025-09-30 22:24:34.617628', 'step': 10137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.660422', 'step': 10137, 'epoch': 2} {'type': 'loss', 'content': 0.09333052486181259, 'timestamp': '2025-09-30 22:24:34.665690', 'step': 10138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:34.696227', 'step': 10138, 'epoch': 2} {'type': 'loss', 'content': 0.05577638000249863, 'timestamp': '2025-09-30 22:24:34.702229', 'step': 10139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.751265', 'step': 10139, 'epoch': 2} {'type': 'loss', 'content': 0.12725545465946198, 'timestamp': '2025-09-30 22:24:34.779722', 'step': 10140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.813907', 'step': 10140, 'epoch': 2} {'type': 'loss', 'content': 0.04626971855759621, 'timestamp': '2025-09-30 22:24:34.817717', 'step': 10141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:34.848394', 'step': 10141, 'epoch': 2} {'type': 'loss', 'content': 0.10584226250648499, 'timestamp': '2025-09-30 22:24:34.853030', 'step': 10142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:34.886717', 'step': 10142, 'epoch': 2} {'type': 'loss', 'content': 0.10998203605413437, 'timestamp': '2025-09-30 22:24:34.891436', 'step': 10143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:34.924778', 'step': 10143, 'epoch': 2} {'type': 'loss', 'content': 0.12076626718044281, 'timestamp': '2025-09-30 22:24:34.948891', 'step': 10144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:34.981911', 'step': 10144, 'epoch': 2} {'type': 'loss', 'content': 0.03507901728153229, 'timestamp': '2025-09-30 22:24:34.987939', 'step': 10145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.025277', 'step': 10145, 'epoch': 2} {'type': 'loss', 'content': 0.08809080719947815, 'timestamp': '2025-09-30 22:24:35.028294', 'step': 10146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.059186', 'step': 10146, 'epoch': 2} {'type': 'loss', 'content': 0.09988649934530258, 'timestamp': '2025-09-30 22:24:35.062823', 'step': 10147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.094613', 'step': 10147, 'epoch': 2} {'type': 'loss', 'content': 0.12727196514606476, 'timestamp': '2025-09-30 22:24:35.119867', 'step': 10148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.151147', 'step': 10148, 'epoch': 2} {'type': 'loss', 'content': 0.07808095961809158, 'timestamp': '2025-09-30 22:24:35.155746', 'step': 10149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.187614', 'step': 10149, 'epoch': 2} {'type': 'loss', 'content': 0.11953112483024597, 'timestamp': '2025-09-30 22:24:35.190787', 'step': 10150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.223381', 'step': 10150, 'epoch': 2} {'type': 'loss', 'content': 0.13897927105426788, 'timestamp': '2025-09-30 22:24:35.232404', 'step': 10151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.263755', 'step': 10151, 'epoch': 2} {'type': 'loss', 'content': 0.15953780710697174, 'timestamp': '2025-09-30 22:24:35.293024', 'step': 10152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.324318', 'step': 10152, 'epoch': 2} {'type': 'loss', 'content': 0.043159667402505875, 'timestamp': '2025-09-30 22:24:35.329410', 'step': 10153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:35.361159', 'step': 10153, 'epoch': 2} {'type': 'loss', 'content': 0.10986075550317764, 'timestamp': '2025-09-30 22:24:35.365002', 'step': 10154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.395337', 'step': 10154, 'epoch': 2} {'type': 'loss', 'content': 0.09263505786657333, 'timestamp': '2025-09-30 22:24:35.398042', 'step': 10155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.430706', 'step': 10155, 'epoch': 2} {'type': 'loss', 'content': 0.09862227737903595, 'timestamp': '2025-09-30 22:24:35.454820', 'step': 10156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.486054', 'step': 10156, 'epoch': 2} {'type': 'loss', 'content': 0.11353111267089844, 'timestamp': '2025-09-30 22:24:35.490225', 'step': 10157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:35.522591', 'step': 10157, 'epoch': 2} {'type': 'loss', 'content': 0.11383572220802307, 'timestamp': '2025-09-30 22:24:35.525423', 'step': 10158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:35.561795', 'step': 10158, 'epoch': 2} {'type': 'loss', 'content': 0.13305306434631348, 'timestamp': '2025-09-30 22:24:35.565806', 'step': 10159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.611023', 'step': 10159, 'epoch': 2} {'type': 'loss', 'content': 0.0712064877152443, 'timestamp': '2025-09-30 22:24:35.644854', 'step': 10160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:35.676116', 'step': 10160, 'epoch': 2} {'type': 'loss', 'content': 0.05161924287676811, 'timestamp': '2025-09-30 22:24:35.679320', 'step': 10161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:35.709599', 'step': 10161, 'epoch': 2} {'type': 'loss', 'content': 0.08918443322181702, 'timestamp': '2025-09-30 22:24:35.712451', 'step': 10162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.747604', 'step': 10162, 'epoch': 2} {'type': 'loss', 'content': 0.17852717638015747, 'timestamp': '2025-09-30 22:24:35.750718', 'step': 10163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:35.786958', 'step': 10163, 'epoch': 2} {'type': 'loss', 'content': 0.11382195353507996, 'timestamp': '2025-09-30 22:24:35.812192', 'step': 10164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:35.854276', 'step': 10164, 'epoch': 2} {'type': 'loss', 'content': 0.1583484411239624, 'timestamp': '2025-09-30 22:24:35.862021', 'step': 10165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.892162', 'step': 10165, 'epoch': 2} {'type': 'loss', 'content': 0.10866238921880722, 'timestamp': '2025-09-30 22:24:35.895029', 'step': 10166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:35.942855', 'step': 10166, 'epoch': 2} {'type': 'loss', 'content': 0.18204335868358612, 'timestamp': '2025-09-30 22:24:35.946122', 'step': 10167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:35.977198', 'step': 10167, 'epoch': 2} {'type': 'loss', 'content': 0.10516224056482315, 'timestamp': '2025-09-30 22:24:36.002155', 'step': 10168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:36.032284', 'step': 10168, 'epoch': 2} {'type': 'loss', 'content': 0.1128825694322586, 'timestamp': '2025-09-30 22:24:36.034661', 'step': 10169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.065087', 'step': 10169, 'epoch': 2} {'type': 'loss', 'content': 0.22920215129852295, 'timestamp': '2025-09-30 22:24:36.068270', 'step': 10170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.102257', 'step': 10170, 'epoch': 2} {'type': 'loss', 'content': 0.15075455605983734, 'timestamp': '2025-09-30 22:24:36.105266', 'step': 10171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.135803', 'step': 10171, 'epoch': 2} {'type': 'loss', 'content': 0.13351701200008392, 'timestamp': '2025-09-30 22:24:36.160860', 'step': 10172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:36.195342', 'step': 10172, 'epoch': 2} {'type': 'loss', 'content': 0.15811647474765778, 'timestamp': '2025-09-30 22:24:36.198769', 'step': 10173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:36.229736', 'step': 10173, 'epoch': 2} {'type': 'loss', 'content': 0.056312885135412216, 'timestamp': '2025-09-30 22:24:36.233439', 'step': 10174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:36.265675', 'step': 10174, 'epoch': 2} {'type': 'loss', 'content': 0.15531934797763824, 'timestamp': '2025-09-30 22:24:36.269659', 'step': 10175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:36.302640', 'step': 10175, 'epoch': 2} {'type': 'loss', 'content': 0.11861313879489899, 'timestamp': '2025-09-30 22:24:36.327901', 'step': 10176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.360889', 'step': 10176, 'epoch': 2} {'type': 'loss', 'content': 0.11302194744348526, 'timestamp': '2025-09-30 22:24:36.364726', 'step': 10177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.403556', 'step': 10177, 'epoch': 2} {'type': 'loss', 'content': 0.16148501634597778, 'timestamp': '2025-09-30 22:24:36.406536', 'step': 10178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:36.438530', 'step': 10178, 'epoch': 2} {'type': 'loss', 'content': 0.06748361885547638, 'timestamp': '2025-09-30 22:24:36.443234', 'step': 10179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.483852', 'step': 10179, 'epoch': 2} {'type': 'loss', 'content': 0.25509971380233765, 'timestamp': '2025-09-30 22:24:36.508074', 'step': 10180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:36.540256', 'step': 10180, 'epoch': 2} {'type': 'loss', 'content': 0.14378637075424194, 'timestamp': '2025-09-30 22:24:36.547648', 'step': 10181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.579188', 'step': 10181, 'epoch': 2} {'type': 'loss', 'content': 0.12861691415309906, 'timestamp': '2025-09-30 22:24:36.583946', 'step': 10182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:36.617807', 'step': 10182, 'epoch': 2} {'type': 'loss', 'content': 0.13071410357952118, 'timestamp': '2025-09-30 22:24:36.631295', 'step': 10183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.673408', 'step': 10183, 'epoch': 2} {'type': 'loss', 'content': 0.08019974082708359, 'timestamp': '2025-09-30 22:24:36.711851', 'step': 10184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.743924', 'step': 10184, 'epoch': 2} {'type': 'loss', 'content': 0.17642059922218323, 'timestamp': '2025-09-30 22:24:36.753000', 'step': 10185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.789328', 'step': 10185, 'epoch': 2} {'type': 'loss', 'content': 0.0769641250371933, 'timestamp': '2025-09-30 22:24:36.792990', 'step': 10186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:36.823953', 'step': 10186, 'epoch': 2} {'type': 'loss', 'content': 0.14113184809684753, 'timestamp': '2025-09-30 22:24:36.827554', 'step': 10187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.861277', 'step': 10187, 'epoch': 2} {'type': 'loss', 'content': 0.14310508966445923, 'timestamp': '2025-09-30 22:24:36.892335', 'step': 10188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:36.941222', 'step': 10188, 'epoch': 2} {'type': 'loss', 'content': 0.10065454244613647, 'timestamp': '2025-09-30 22:24:36.944619', 'step': 10189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:36.977726', 'step': 10189, 'epoch': 2} {'type': 'loss', 'content': 0.07524445652961731, 'timestamp': '2025-09-30 22:24:36.981423', 'step': 10190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.013016', 'step': 10190, 'epoch': 2} {'type': 'loss', 'content': 0.24441513419151306, 'timestamp': '2025-09-30 22:24:37.021063', 'step': 10191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.054519', 'step': 10191, 'epoch': 2} {'type': 'loss', 'content': 0.10028912872076035, 'timestamp': '2025-09-30 22:24:37.090966', 'step': 10192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:37.129192', 'step': 10192, 'epoch': 2} {'type': 'loss', 'content': 0.08765587210655212, 'timestamp': '2025-09-30 22:24:37.133915', 'step': 10193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.174634', 'step': 10193, 'epoch': 2} {'type': 'loss', 'content': 0.12901386618614197, 'timestamp': '2025-09-30 22:24:37.178516', 'step': 10194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.215109', 'step': 10194, 'epoch': 2} {'type': 'loss', 'content': 0.15736596286296844, 'timestamp': '2025-09-30 22:24:37.218905', 'step': 10195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.256734', 'step': 10195, 'epoch': 2} {'type': 'loss', 'content': 0.1533714085817337, 'timestamp': '2025-09-30 22:24:37.282117', 'step': 10196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:37.315207', 'step': 10196, 'epoch': 2} {'type': 'loss', 'content': 0.050272002816200256, 'timestamp': '2025-09-30 22:24:37.319577', 'step': 10197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.355726', 'step': 10197, 'epoch': 2} {'type': 'loss', 'content': 0.12819409370422363, 'timestamp': '2025-09-30 22:24:37.359381', 'step': 10198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.391509', 'step': 10198, 'epoch': 2} {'type': 'loss', 'content': 0.09500426799058914, 'timestamp': '2025-09-30 22:24:37.400350', 'step': 10199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.434372', 'step': 10199, 'epoch': 2} {'type': 'loss', 'content': 0.1495373249053955, 'timestamp': '2025-09-30 22:24:37.460521', 'step': 10200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:37.491140', 'step': 10200, 'epoch': 2} {'type': 'loss', 'content': 0.15184372663497925, 'timestamp': '2025-09-30 22:24:37.505008', 'step': 10201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.544699', 'step': 10201, 'epoch': 2} {'type': 'loss', 'content': 0.06312330812215805, 'timestamp': '2025-09-30 22:24:37.547811', 'step': 10202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.578722', 'step': 10202, 'epoch': 2} {'type': 'loss', 'content': 0.11566745489835739, 'timestamp': '2025-09-30 22:24:37.588903', 'step': 10203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:37.626163', 'step': 10203, 'epoch': 2} {'type': 'loss', 'content': 0.08955959975719452, 'timestamp': '2025-09-30 22:24:37.650851', 'step': 10204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:37.684750', 'step': 10204, 'epoch': 2} {'type': 'loss', 'content': 0.13108286261558533, 'timestamp': '2025-09-30 22:24:37.687905', 'step': 10205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:37.718568', 'step': 10205, 'epoch': 2} {'type': 'loss', 'content': 0.07034789025783539, 'timestamp': '2025-09-30 22:24:37.724241', 'step': 10206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:37.757641', 'step': 10206, 'epoch': 2} {'type': 'loss', 'content': 0.09642244130373001, 'timestamp': '2025-09-30 22:24:37.763677', 'step': 10207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:37.798611', 'step': 10207, 'epoch': 2} {'type': 'loss', 'content': 0.10880079120397568, 'timestamp': '2025-09-30 22:24:37.824932', 'step': 10208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:24:37.867599', 'step': 10208, 'epoch': 2} {'type': 'loss', 'content': 0.11343943327665329, 'timestamp': '2025-09-30 22:24:37.872356', 'step': 10209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:37.913761', 'step': 10209, 'epoch': 2} {'type': 'loss', 'content': 0.10045742988586426, 'timestamp': '2025-09-30 22:24:37.918813', 'step': 10210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:37.951160', 'step': 10210, 'epoch': 2} {'type': 'loss', 'content': 0.08918482810258865, 'timestamp': '2025-09-30 22:24:37.954117', 'step': 10211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:37.987285', 'step': 10211, 'epoch': 2} {'type': 'loss', 'content': 0.06520155072212219, 'timestamp': '2025-09-30 22:24:38.014209', 'step': 10212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:38.046004', 'step': 10212, 'epoch': 2} {'type': 'loss', 'content': 0.07503224909305573, 'timestamp': '2025-09-30 22:24:38.057321', 'step': 10213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.097675', 'step': 10213, 'epoch': 2} {'type': 'loss', 'content': 0.1937161087989807, 'timestamp': '2025-09-30 22:24:38.106685', 'step': 10214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:38.143719', 'step': 10214, 'epoch': 2} {'type': 'loss', 'content': 0.1518520712852478, 'timestamp': '2025-09-30 22:24:38.147840', 'step': 10215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:38.184184', 'step': 10215, 'epoch': 2} {'type': 'loss', 'content': 0.08839605748653412, 'timestamp': '2025-09-30 22:24:38.211304', 'step': 10216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:38.242434', 'step': 10216, 'epoch': 2} {'type': 'loss', 'content': 0.1087745800614357, 'timestamp': '2025-09-30 22:24:38.250544', 'step': 10217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.285048', 'step': 10217, 'epoch': 2} {'type': 'loss', 'content': 0.16649141907691956, 'timestamp': '2025-09-30 22:24:38.301605', 'step': 10218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:38.341016', 'step': 10218, 'epoch': 2} {'type': 'loss', 'content': 0.10560379922389984, 'timestamp': '2025-09-30 22:24:38.346359', 'step': 10219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:38.377264', 'step': 10219, 'epoch': 2} {'type': 'loss', 'content': 0.05924474075436592, 'timestamp': '2025-09-30 22:24:38.403369', 'step': 10220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:38.446479', 'step': 10220, 'epoch': 2} {'type': 'loss', 'content': 0.14848311245441437, 'timestamp': '2025-09-30 22:24:38.463528', 'step': 10221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.495802', 'step': 10221, 'epoch': 2} {'type': 'loss', 'content': 0.08093886077404022, 'timestamp': '2025-09-30 22:24:38.501482', 'step': 10222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:38.532386', 'step': 10222, 'epoch': 2} {'type': 'loss', 'content': 0.07420144230127335, 'timestamp': '2025-09-30 22:24:38.547058', 'step': 10223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:38.588578', 'step': 10223, 'epoch': 2} {'type': 'loss', 'content': 0.14195893704891205, 'timestamp': '2025-09-30 22:24:38.613218', 'step': 10224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.645643', 'step': 10224, 'epoch': 2} {'type': 'loss', 'content': 0.1404140591621399, 'timestamp': '2025-09-30 22:24:38.655265', 'step': 10225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:38.693839', 'step': 10225, 'epoch': 2} {'type': 'loss', 'content': 0.15998324751853943, 'timestamp': '2025-09-30 22:24:38.696719', 'step': 10226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:38.737681', 'step': 10226, 'epoch': 2} {'type': 'loss', 'content': 0.1359911859035492, 'timestamp': '2025-09-30 22:24:38.740748', 'step': 10227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.774809', 'step': 10227, 'epoch': 2} {'type': 'loss', 'content': 0.07334078848361969, 'timestamp': '2025-09-30 22:24:38.798948', 'step': 10228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.832403', 'step': 10228, 'epoch': 2} {'type': 'loss', 'content': 0.10799868404865265, 'timestamp': '2025-09-30 22:24:38.835606', 'step': 10229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:38.866408', 'step': 10229, 'epoch': 2} {'type': 'loss', 'content': 0.17183437943458557, 'timestamp': '2025-09-30 22:24:38.871257', 'step': 10230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.901763', 'step': 10230, 'epoch': 2} {'type': 'loss', 'content': 0.06869535893201828, 'timestamp': '2025-09-30 22:24:38.904607', 'step': 10231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:38.935441', 'step': 10231, 'epoch': 2} {'type': 'loss', 'content': 0.1916481852531433, 'timestamp': '2025-09-30 22:24:38.960076', 'step': 10232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:38.995049', 'step': 10232, 'epoch': 2} {'type': 'loss', 'content': 0.05328700318932533, 'timestamp': '2025-09-30 22:24:39.002775', 'step': 10233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.033419', 'step': 10233, 'epoch': 2} {'type': 'loss', 'content': 0.1682479828596115, 'timestamp': '2025-09-30 22:24:39.036083', 'step': 10234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:39.066644', 'step': 10234, 'epoch': 2} {'type': 'loss', 'content': 0.1274663209915161, 'timestamp': '2025-09-30 22:24:39.070416', 'step': 10235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.103804', 'step': 10235, 'epoch': 2} {'type': 'loss', 'content': 0.09116719663143158, 'timestamp': '2025-09-30 22:24:39.130514', 'step': 10236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:39.161663', 'step': 10236, 'epoch': 2} {'type': 'loss', 'content': 0.1362461894750595, 'timestamp': '2025-09-30 22:24:39.166233', 'step': 10237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.201299', 'step': 10237, 'epoch': 2} {'type': 'loss', 'content': 0.11237329244613647, 'timestamp': '2025-09-30 22:24:39.204401', 'step': 10238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.238661', 'step': 10238, 'epoch': 2} {'type': 'loss', 'content': 0.07747042924165726, 'timestamp': '2025-09-30 22:24:39.250166', 'step': 10239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.292264', 'step': 10239, 'epoch': 2} {'type': 'loss', 'content': 0.17231062054634094, 'timestamp': '2025-09-30 22:24:39.318996', 'step': 10240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.350278', 'step': 10240, 'epoch': 2} {'type': 'loss', 'content': 0.09209306538105011, 'timestamp': '2025-09-30 22:24:39.361114', 'step': 10241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.401639', 'step': 10241, 'epoch': 2} {'type': 'loss', 'content': 0.14706771075725555, 'timestamp': '2025-09-30 22:24:39.405444', 'step': 10242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.441162', 'step': 10242, 'epoch': 2} {'type': 'loss', 'content': 0.15836754441261292, 'timestamp': '2025-09-30 22:24:39.443994', 'step': 10243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:39.480747', 'step': 10243, 'epoch': 2} {'type': 'loss', 'content': 0.14846602082252502, 'timestamp': '2025-09-30 22:24:39.505593', 'step': 10244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.537068', 'step': 10244, 'epoch': 2} {'type': 'loss', 'content': 0.06623084843158722, 'timestamp': '2025-09-30 22:24:39.543514', 'step': 10245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.578541', 'step': 10245, 'epoch': 2} {'type': 'loss', 'content': 0.19761382043361664, 'timestamp': '2025-09-30 22:24:39.581122', 'step': 10246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.614949', 'step': 10246, 'epoch': 2} {'type': 'loss', 'content': 0.1187993511557579, 'timestamp': '2025-09-30 22:24:39.620322', 'step': 10247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.656594', 'step': 10247, 'epoch': 2} {'type': 'loss', 'content': 0.10092868655920029, 'timestamp': '2025-09-30 22:24:39.685884', 'step': 10248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:39.719707', 'step': 10248, 'epoch': 2} {'type': 'loss', 'content': 0.09216655045747757, 'timestamp': '2025-09-30 22:24:39.722115', 'step': 10249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.754633', 'step': 10249, 'epoch': 2} {'type': 'loss', 'content': 0.15825137495994568, 'timestamp': '2025-09-30 22:24:39.763529', 'step': 10250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:39.796807', 'step': 10250, 'epoch': 2} {'type': 'loss', 'content': 0.08743276447057724, 'timestamp': '2025-09-30 22:24:39.800209', 'step': 10251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:39.842773', 'step': 10251, 'epoch': 2} {'type': 'loss', 'content': 0.2761270999908447, 'timestamp': '2025-09-30 22:24:39.871212', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:24:47.774452', 'step': 10252, 'epoch': 2} {'type': 'pplx', 'content': 13746.215729965046, 'timestamp': '2025-09-30 22:24:47.780521', 'step': 10252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:47.811626', 'step': 10252, 'epoch': 2} {'type': 'loss', 'content': 0.1268032044172287, 'timestamp': '2025-09-30 22:24:47.821533', 'step': 10253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:47.854280', 'step': 10253, 'epoch': 2} {'type': 'loss', 'content': 0.1339886486530304, 'timestamp': '2025-09-30 22:24:47.857597', 'step': 10254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:47.888848', 'step': 10254, 'epoch': 2} {'type': 'loss', 'content': 0.08312595635652542, 'timestamp': '2025-09-30 22:24:47.892991', 'step': 10255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:47.923559', 'step': 10255, 'epoch': 2} {'type': 'loss', 'content': 0.08806083351373672, 'timestamp': '2025-09-30 22:24:47.957014', 'step': 10256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:47.987931', 'step': 10256, 'epoch': 2} {'type': 'loss', 'content': 0.10819365084171295, 'timestamp': '2025-09-30 22:24:47.991372', 'step': 10257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:48.024167', 'step': 10257, 'epoch': 2} {'type': 'loss', 'content': 0.13397465646266937, 'timestamp': '2025-09-30 22:24:48.028560', 'step': 10258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:48.060321', 'step': 10258, 'epoch': 2} {'type': 'loss', 'content': 0.03399050608277321, 'timestamp': '2025-09-30 22:24:48.064354', 'step': 10259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.097500', 'step': 10259, 'epoch': 2} {'type': 'loss', 'content': 0.08965663611888885, 'timestamp': '2025-09-30 22:24:48.122345', 'step': 10260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:48.157318', 'step': 10260, 'epoch': 2} {'type': 'loss', 'content': 0.04475581645965576, 'timestamp': '2025-09-30 22:24:48.171826', 'step': 10261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.207776', 'step': 10261, 'epoch': 2} {'type': 'loss', 'content': 0.14655651152133942, 'timestamp': '2025-09-30 22:24:48.212491', 'step': 10262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:48.245292', 'step': 10262, 'epoch': 2} {'type': 'loss', 'content': 0.05429629981517792, 'timestamp': '2025-09-30 22:24:48.256015', 'step': 10263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.290984', 'step': 10263, 'epoch': 2} {'type': 'loss', 'content': 0.12793579697608948, 'timestamp': '2025-09-30 22:24:48.315801', 'step': 10264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.347191', 'step': 10264, 'epoch': 2} {'type': 'loss', 'content': 0.1456846296787262, 'timestamp': '2025-09-30 22:24:48.352621', 'step': 10265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.387762', 'step': 10265, 'epoch': 2} {'type': 'loss', 'content': 0.08618248254060745, 'timestamp': '2025-09-30 22:24:48.391772', 'step': 10266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:48.423167', 'step': 10266, 'epoch': 2} {'type': 'loss', 'content': 0.17408038675785065, 'timestamp': '2025-09-30 22:24:48.426232', 'step': 10267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.470861', 'step': 10267, 'epoch': 2} {'type': 'loss', 'content': 0.16080105304718018, 'timestamp': '2025-09-30 22:24:48.501814', 'step': 10268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:48.534641', 'step': 10268, 'epoch': 2} {'type': 'loss', 'content': 0.15735875070095062, 'timestamp': '2025-09-30 22:24:48.539643', 'step': 10269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.575823', 'step': 10269, 'epoch': 2} {'type': 'loss', 'content': 0.13952316343784332, 'timestamp': '2025-09-30 22:24:48.580385', 'step': 10270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.612304', 'step': 10270, 'epoch': 2} {'type': 'loss', 'content': 0.07375939190387726, 'timestamp': '2025-09-30 22:24:48.622038', 'step': 10271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.658616', 'step': 10271, 'epoch': 2} {'type': 'loss', 'content': 0.03908609598875046, 'timestamp': '2025-09-30 22:24:48.689446', 'step': 10272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:48.731803', 'step': 10272, 'epoch': 2} {'type': 'loss', 'content': 0.10557352751493454, 'timestamp': '2025-09-30 22:24:48.736866', 'step': 10273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.777018', 'step': 10273, 'epoch': 2} {'type': 'loss', 'content': 0.08097214251756668, 'timestamp': '2025-09-30 22:24:48.781646', 'step': 10274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.816028', 'step': 10274, 'epoch': 2} {'type': 'loss', 'content': 0.10317378491163254, 'timestamp': '2025-09-30 22:24:48.819646', 'step': 10275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:48.853384', 'step': 10275, 'epoch': 2} {'type': 'loss', 'content': 0.1265077143907547, 'timestamp': '2025-09-30 22:24:48.879730', 'step': 10276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:48.911502', 'step': 10276, 'epoch': 2} {'type': 'loss', 'content': 0.08844371885061264, 'timestamp': '2025-09-30 22:24:48.914214', 'step': 10277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:48.948095', 'step': 10277, 'epoch': 2} {'type': 'loss', 'content': 0.0795578584074974, 'timestamp': '2025-09-30 22:24:48.950806', 'step': 10278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:48.982015', 'step': 10278, 'epoch': 2} {'type': 'loss', 'content': 0.22472339868545532, 'timestamp': '2025-09-30 22:24:48.986484', 'step': 10279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.018532', 'step': 10279, 'epoch': 2} {'type': 'loss', 'content': 0.14431418478488922, 'timestamp': '2025-09-30 22:24:49.049808', 'step': 10280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.080538', 'step': 10280, 'epoch': 2} {'type': 'loss', 'content': 0.1413542628288269, 'timestamp': '2025-09-30 22:24:49.084477', 'step': 10281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.120577', 'step': 10281, 'epoch': 2} {'type': 'loss', 'content': 0.18655283749103546, 'timestamp': '2025-09-30 22:24:49.123992', 'step': 10282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:49.156061', 'step': 10282, 'epoch': 2} {'type': 'loss', 'content': 0.11132408678531647, 'timestamp': '2025-09-30 22:24:49.158776', 'step': 10283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.191583', 'step': 10283, 'epoch': 2} {'type': 'loss', 'content': 0.07453452795743942, 'timestamp': '2025-09-30 22:24:49.216886', 'step': 10284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.252514', 'step': 10284, 'epoch': 2} {'type': 'loss', 'content': 0.13836179673671722, 'timestamp': '2025-09-30 22:24:49.255607', 'step': 10285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:49.287525', 'step': 10285, 'epoch': 2} {'type': 'loss', 'content': 0.10410363227128983, 'timestamp': '2025-09-30 22:24:49.304386', 'step': 10286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.345569', 'step': 10286, 'epoch': 2} {'type': 'loss', 'content': 0.11015978455543518, 'timestamp': '2025-09-30 22:24:49.348558', 'step': 10287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:49.380157', 'step': 10287, 'epoch': 2} {'type': 'loss', 'content': 0.06337863951921463, 'timestamp': '2025-09-30 22:24:49.405684', 'step': 10288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.437689', 'step': 10288, 'epoch': 2} {'type': 'loss', 'content': 0.03384391963481903, 'timestamp': '2025-09-30 22:24:49.441859', 'step': 10289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.473498', 'step': 10289, 'epoch': 2} {'type': 'loss', 'content': 0.19091343879699707, 'timestamp': '2025-09-30 22:24:49.476131', 'step': 10290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.508587', 'step': 10290, 'epoch': 2} {'type': 'loss', 'content': 0.1281520426273346, 'timestamp': '2025-09-30 22:24:49.511855', 'step': 10291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.543037', 'step': 10291, 'epoch': 2} {'type': 'loss', 'content': 0.09033464640378952, 'timestamp': '2025-09-30 22:24:49.567280', 'step': 10292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.600647', 'step': 10292, 'epoch': 2} {'type': 'loss', 'content': 0.06001332029700279, 'timestamp': '2025-09-30 22:24:49.605416', 'step': 10293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.638183', 'step': 10293, 'epoch': 2} {'type': 'loss', 'content': 0.06809502840042114, 'timestamp': '2025-09-30 22:24:49.645490', 'step': 10294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:49.693157', 'step': 10294, 'epoch': 2} {'type': 'loss', 'content': 0.056258246302604675, 'timestamp': '2025-09-30 22:24:49.703620', 'step': 10295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.743534', 'step': 10295, 'epoch': 2} {'type': 'loss', 'content': 0.12393955141305923, 'timestamp': '2025-09-30 22:24:49.774223', 'step': 10296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.804755', 'step': 10296, 'epoch': 2} {'type': 'loss', 'content': 0.1437520980834961, 'timestamp': '2025-09-30 22:24:49.815306', 'step': 10297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:49.850720', 'step': 10297, 'epoch': 2} {'type': 'loss', 'content': 0.15874223411083221, 'timestamp': '2025-09-30 22:24:49.854221', 'step': 10298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.887783', 'step': 10298, 'epoch': 2} {'type': 'loss', 'content': 0.09034623950719833, 'timestamp': '2025-09-30 22:24:49.892541', 'step': 10299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:49.923119', 'step': 10299, 'epoch': 2} {'type': 'loss', 'content': 0.1479908525943756, 'timestamp': '2025-09-30 22:24:49.949138', 'step': 10300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:49.980054', 'step': 10300, 'epoch': 2} {'type': 'loss', 'content': 0.19169604778289795, 'timestamp': '2025-09-30 22:24:49.983403', 'step': 10301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:50.024795', 'step': 10301, 'epoch': 2} {'type': 'loss', 'content': 0.09691089391708374, 'timestamp': '2025-09-30 22:24:50.037182', 'step': 10302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:50.085183', 'step': 10302, 'epoch': 2} {'type': 'loss', 'content': 0.0898786261677742, 'timestamp': '2025-09-30 22:24:50.094891', 'step': 10303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.127564', 'step': 10303, 'epoch': 2} {'type': 'loss', 'content': 0.12168143689632416, 'timestamp': '2025-09-30 22:24:50.155108', 'step': 10304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.186552', 'step': 10304, 'epoch': 2} {'type': 'loss', 'content': 0.064527727663517, 'timestamp': '2025-09-30 22:24:50.190115', 'step': 10305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.222264', 'step': 10305, 'epoch': 2} {'type': 'loss', 'content': 0.1555054783821106, 'timestamp': '2025-09-30 22:24:50.226826', 'step': 10306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.258492', 'step': 10306, 'epoch': 2} {'type': 'loss', 'content': 0.09965400397777557, 'timestamp': '2025-09-30 22:24:50.261421', 'step': 10307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:50.299900', 'step': 10307, 'epoch': 2} {'type': 'loss', 'content': 0.07670382410287857, 'timestamp': '2025-09-30 22:24:50.333348', 'step': 10308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.370138', 'step': 10308, 'epoch': 2} {'type': 'loss', 'content': 0.13006934523582458, 'timestamp': '2025-09-30 22:24:50.373945', 'step': 10309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.404460', 'step': 10309, 'epoch': 2} {'type': 'loss', 'content': 0.09952362626791, 'timestamp': '2025-09-30 22:24:50.409110', 'step': 10310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.441260', 'step': 10310, 'epoch': 2} {'type': 'loss', 'content': 0.0863911584019661, 'timestamp': '2025-09-30 22:24:50.444608', 'step': 10311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.484708', 'step': 10311, 'epoch': 2} {'type': 'loss', 'content': 0.08149149268865585, 'timestamp': '2025-09-30 22:24:50.524155', 'step': 10312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.560668', 'step': 10312, 'epoch': 2} {'type': 'loss', 'content': 0.09658809006214142, 'timestamp': '2025-09-30 22:24:50.563139', 'step': 10313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:50.601278', 'step': 10313, 'epoch': 2} {'type': 'loss', 'content': 0.10031674057245255, 'timestamp': '2025-09-30 22:24:50.605636', 'step': 10314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:50.638486', 'step': 10314, 'epoch': 2} {'type': 'loss', 'content': 0.10076577961444855, 'timestamp': '2025-09-30 22:24:50.641897', 'step': 10315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.675007', 'step': 10315, 'epoch': 2} {'type': 'loss', 'content': 0.08511725813150406, 'timestamp': '2025-09-30 22:24:50.701628', 'step': 10316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.732921', 'step': 10316, 'epoch': 2} {'type': 'loss', 'content': 0.15006393194198608, 'timestamp': '2025-09-30 22:24:50.739042', 'step': 10317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.775897', 'step': 10317, 'epoch': 2} {'type': 'loss', 'content': 0.09272468835115433, 'timestamp': '2025-09-30 22:24:50.781389', 'step': 10318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:50.815513', 'step': 10318, 'epoch': 2} {'type': 'loss', 'content': 0.16927099227905273, 'timestamp': '2025-09-30 22:24:50.819853', 'step': 10319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.860605', 'step': 10319, 'epoch': 2} {'type': 'loss', 'content': 0.1476297229528427, 'timestamp': '2025-09-30 22:24:50.886096', 'step': 10320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.918550', 'step': 10320, 'epoch': 2} {'type': 'loss', 'content': 0.10008398443460464, 'timestamp': '2025-09-30 22:24:50.926995', 'step': 10321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:50.966294', 'step': 10321, 'epoch': 2} {'type': 'loss', 'content': 0.1619354486465454, 'timestamp': '2025-09-30 22:24:50.973412', 'step': 10322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.005848', 'step': 10322, 'epoch': 2} {'type': 'loss', 'content': 0.06602291017770767, 'timestamp': '2025-09-30 22:24:51.009127', 'step': 10323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.050810', 'step': 10323, 'epoch': 2} {'type': 'loss', 'content': 0.09894909709692001, 'timestamp': '2025-09-30 22:24:51.075184', 'step': 10324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:51.108658', 'step': 10324, 'epoch': 2} {'type': 'loss', 'content': 0.09866099804639816, 'timestamp': '2025-09-30 22:24:51.111386', 'step': 10325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:51.144868', 'step': 10325, 'epoch': 2} {'type': 'loss', 'content': 0.13125622272491455, 'timestamp': '2025-09-30 22:24:51.149447', 'step': 10326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.180353', 'step': 10326, 'epoch': 2} {'type': 'loss', 'content': 0.11775843054056168, 'timestamp': '2025-09-30 22:24:51.184394', 'step': 10327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.219405', 'step': 10327, 'epoch': 2} {'type': 'loss', 'content': 0.11948222666978836, 'timestamp': '2025-09-30 22:24:51.243770', 'step': 10328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:51.277307', 'step': 10328, 'epoch': 2} {'type': 'loss', 'content': 0.1581919938325882, 'timestamp': '2025-09-30 22:24:51.282397', 'step': 10329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:51.313339', 'step': 10329, 'epoch': 2} {'type': 'loss', 'content': 0.06531219184398651, 'timestamp': '2025-09-30 22:24:51.316566', 'step': 10330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.348736', 'step': 10330, 'epoch': 2} {'type': 'loss', 'content': 0.09539322555065155, 'timestamp': '2025-09-30 22:24:51.351504', 'step': 10331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.384891', 'step': 10331, 'epoch': 2} {'type': 'loss', 'content': 0.07914324104785919, 'timestamp': '2025-09-30 22:24:51.411527', 'step': 10332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:51.444165', 'step': 10332, 'epoch': 2} {'type': 'loss', 'content': 0.15818341076374054, 'timestamp': '2025-09-30 22:24:51.448114', 'step': 10333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:51.479086', 'step': 10333, 'epoch': 2} {'type': 'loss', 'content': 0.0853596180677414, 'timestamp': '2025-09-30 22:24:51.483507', 'step': 10334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:51.515315', 'step': 10334, 'epoch': 2} {'type': 'loss', 'content': 0.08925110846757889, 'timestamp': '2025-09-30 22:24:51.518249', 'step': 10335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:51.556684', 'step': 10335, 'epoch': 2} {'type': 'loss', 'content': 0.12059628963470459, 'timestamp': '2025-09-30 22:24:51.584480', 'step': 10336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:51.616899', 'step': 10336, 'epoch': 2} {'type': 'loss', 'content': 0.18102411925792694, 'timestamp': '2025-09-30 22:24:51.619911', 'step': 10337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:51.655164', 'step': 10337, 'epoch': 2} {'type': 'loss', 'content': 0.1698487251996994, 'timestamp': '2025-09-30 22:24:51.659742', 'step': 10338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:51.693119', 'step': 10338, 'epoch': 2} {'type': 'loss', 'content': 0.09609044343233109, 'timestamp': '2025-09-30 22:24:51.701260', 'step': 10339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:51.736554', 'step': 10339, 'epoch': 2} {'type': 'loss', 'content': 0.11093645542860031, 'timestamp': '2025-09-30 22:24:51.767077', 'step': 10340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:51.803651', 'step': 10340, 'epoch': 2} {'type': 'loss', 'content': 0.11626182496547699, 'timestamp': '2025-09-30 22:24:51.807841', 'step': 10341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:51.839273', 'step': 10341, 'epoch': 2} {'type': 'loss', 'content': 0.16160725057125092, 'timestamp': '2025-09-30 22:24:51.842745', 'step': 10342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:51.873566', 'step': 10342, 'epoch': 2} {'type': 'loss', 'content': 0.10123679041862488, 'timestamp': '2025-09-30 22:24:51.880241', 'step': 10343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:51.924473', 'step': 10343, 'epoch': 2} {'type': 'loss', 'content': 0.18371817469596863, 'timestamp': '2025-09-30 22:24:51.955867', 'step': 10344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:51.989749', 'step': 10344, 'epoch': 2} {'type': 'loss', 'content': 0.06848473846912384, 'timestamp': '2025-09-30 22:24:51.992636', 'step': 10345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.032804', 'step': 10345, 'epoch': 2} {'type': 'loss', 'content': 0.05360422283411026, 'timestamp': '2025-09-30 22:24:52.036164', 'step': 10346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.068489', 'step': 10346, 'epoch': 2} {'type': 'loss', 'content': 0.12054653465747833, 'timestamp': '2025-09-30 22:24:52.071247', 'step': 10347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:52.102491', 'step': 10347, 'epoch': 2} {'type': 'loss', 'content': 0.07970580458641052, 'timestamp': '2025-09-30 22:24:52.127377', 'step': 10348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.160362', 'step': 10348, 'epoch': 2} {'type': 'loss', 'content': 0.13550062477588654, 'timestamp': '2025-09-30 22:24:52.166084', 'step': 10349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:52.199590', 'step': 10349, 'epoch': 2} {'type': 'loss', 'content': 0.10076422244310379, 'timestamp': '2025-09-30 22:24:52.209221', 'step': 10350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.241621', 'step': 10350, 'epoch': 2} {'type': 'loss', 'content': 0.07619337737560272, 'timestamp': '2025-09-30 22:24:52.244648', 'step': 10351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:52.276127', 'step': 10351, 'epoch': 2} {'type': 'loss', 'content': 0.08142322301864624, 'timestamp': '2025-09-30 22:24:52.300016', 'step': 10352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:52.332104', 'step': 10352, 'epoch': 2} {'type': 'loss', 'content': 0.07171844691038132, 'timestamp': '2025-09-30 22:24:52.336739', 'step': 10353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.375640', 'step': 10353, 'epoch': 2} {'type': 'loss', 'content': 0.10718914121389389, 'timestamp': '2025-09-30 22:24:52.379445', 'step': 10354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.411507', 'step': 10354, 'epoch': 2} {'type': 'loss', 'content': 0.09202319383621216, 'timestamp': '2025-09-30 22:24:52.414683', 'step': 10355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:24:52.449679', 'step': 10355, 'epoch': 2} {'type': 'loss', 'content': 0.18335436284542084, 'timestamp': '2025-09-30 22:24:52.475312', 'step': 10356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.509775', 'step': 10356, 'epoch': 2} {'type': 'loss', 'content': 0.063361257314682, 'timestamp': '2025-09-30 22:24:52.512831', 'step': 10357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.545858', 'step': 10357, 'epoch': 2} {'type': 'loss', 'content': 0.11455775797367096, 'timestamp': '2025-09-30 22:24:52.548459', 'step': 10358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.584805', 'step': 10358, 'epoch': 2} {'type': 'loss', 'content': 0.21580661833286285, 'timestamp': '2025-09-30 22:24:52.592524', 'step': 10359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.628064', 'step': 10359, 'epoch': 2} {'type': 'loss', 'content': 0.09893391281366348, 'timestamp': '2025-09-30 22:24:52.652588', 'step': 10360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.683748', 'step': 10360, 'epoch': 2} {'type': 'loss', 'content': 0.14344246685504913, 'timestamp': '2025-09-30 22:24:52.689365', 'step': 10361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.723168', 'step': 10361, 'epoch': 2} {'type': 'loss', 'content': 0.06948328018188477, 'timestamp': '2025-09-30 22:24:52.727017', 'step': 10362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.765455', 'step': 10362, 'epoch': 2} {'type': 'loss', 'content': 0.1382133513689041, 'timestamp': '2025-09-30 22:24:52.776798', 'step': 10363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.807426', 'step': 10363, 'epoch': 2} {'type': 'loss', 'content': 0.039590515196323395, 'timestamp': '2025-09-30 22:24:52.831735', 'step': 10364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:52.867367', 'step': 10364, 'epoch': 2} {'type': 'loss', 'content': 0.1065848171710968, 'timestamp': '2025-09-30 22:24:52.870042', 'step': 10365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:52.901727', 'step': 10365, 'epoch': 2} {'type': 'loss', 'content': 0.16443532705307007, 'timestamp': '2025-09-30 22:24:52.904059', 'step': 10366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.934115', 'step': 10366, 'epoch': 2} {'type': 'loss', 'content': 0.07508033514022827, 'timestamp': '2025-09-30 22:24:52.940658', 'step': 10367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:52.973555', 'step': 10367, 'epoch': 2} {'type': 'loss', 'content': 0.10318699479103088, 'timestamp': '2025-09-30 22:24:53.008209', 'step': 10368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:53.045322', 'step': 10368, 'epoch': 2} {'type': 'loss', 'content': 0.0920708030462265, 'timestamp': '2025-09-30 22:24:53.048949', 'step': 10369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.089422', 'step': 10369, 'epoch': 2} {'type': 'loss', 'content': 0.08764142543077469, 'timestamp': '2025-09-30 22:24:53.093985', 'step': 10370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.125894', 'step': 10370, 'epoch': 2} {'type': 'loss', 'content': 0.10608050972223282, 'timestamp': '2025-09-30 22:24:53.129721', 'step': 10371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.165611', 'step': 10371, 'epoch': 2} {'type': 'loss', 'content': 0.14004941284656525, 'timestamp': '2025-09-30 22:24:53.194579', 'step': 10372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:53.233281', 'step': 10372, 'epoch': 2} {'type': 'loss', 'content': 0.09441135823726654, 'timestamp': '2025-09-30 22:24:53.237342', 'step': 10373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:53.273879', 'step': 10373, 'epoch': 2} {'type': 'loss', 'content': 0.10915913432836533, 'timestamp': '2025-09-30 22:24:53.278958', 'step': 10374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.312840', 'step': 10374, 'epoch': 2} {'type': 'loss', 'content': 0.1313924491405487, 'timestamp': '2025-09-30 22:24:53.318982', 'step': 10375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.360009', 'step': 10375, 'epoch': 2} {'type': 'loss', 'content': 0.11811314523220062, 'timestamp': '2025-09-30 22:24:53.387121', 'step': 10376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.428440', 'step': 10376, 'epoch': 2} {'type': 'loss', 'content': 0.07277965545654297, 'timestamp': '2025-09-30 22:24:53.431860', 'step': 10377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.462311', 'step': 10377, 'epoch': 2} {'type': 'loss', 'content': 0.11590415239334106, 'timestamp': '2025-09-30 22:24:53.468384', 'step': 10378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:53.500638', 'step': 10378, 'epoch': 2} {'type': 'loss', 'content': 0.1273895651102066, 'timestamp': '2025-09-30 22:24:53.505415', 'step': 10379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.545301', 'step': 10379, 'epoch': 2} {'type': 'loss', 'content': 0.18222247064113617, 'timestamp': '2025-09-30 22:24:53.574607', 'step': 10380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.618702', 'step': 10380, 'epoch': 2} {'type': 'loss', 'content': 0.16816942393779755, 'timestamp': '2025-09-30 22:24:53.622876', 'step': 10381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.655842', 'step': 10381, 'epoch': 2} {'type': 'loss', 'content': 0.08414202928543091, 'timestamp': '2025-09-30 22:24:53.661096', 'step': 10382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:53.696123', 'step': 10382, 'epoch': 2} {'type': 'loss', 'content': 0.08255227655172348, 'timestamp': '2025-09-30 22:24:53.700803', 'step': 10383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:53.741409', 'step': 10383, 'epoch': 2} {'type': 'loss', 'content': 0.14064189791679382, 'timestamp': '2025-09-30 22:24:53.767155', 'step': 10384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:53.810584', 'step': 10384, 'epoch': 2} {'type': 'loss', 'content': 0.18078729510307312, 'timestamp': '2025-09-30 22:24:53.815256', 'step': 10385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:53.848706', 'step': 10385, 'epoch': 2} {'type': 'loss', 'content': 0.1206710934638977, 'timestamp': '2025-09-30 22:24:53.852416', 'step': 10386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.887492', 'step': 10386, 'epoch': 2} {'type': 'loss', 'content': 0.05023615434765816, 'timestamp': '2025-09-30 22:24:53.892731', 'step': 10387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:53.925480', 'step': 10387, 'epoch': 2} {'type': 'loss', 'content': 0.07359840720891953, 'timestamp': '2025-09-30 22:24:53.953642', 'step': 10388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:53.985337', 'step': 10388, 'epoch': 2} {'type': 'loss', 'content': 0.17020004987716675, 'timestamp': '2025-09-30 22:24:53.989450', 'step': 10389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:54.021328', 'step': 10389, 'epoch': 2} {'type': 'loss', 'content': 0.09476107358932495, 'timestamp': '2025-09-30 22:24:54.030566', 'step': 10390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.062325', 'step': 10390, 'epoch': 2} {'type': 'loss', 'content': 0.08980421721935272, 'timestamp': '2025-09-30 22:24:54.069949', 'step': 10391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:54.102831', 'step': 10391, 'epoch': 2} {'type': 'loss', 'content': 0.12846744060516357, 'timestamp': '2025-09-30 22:24:54.128754', 'step': 10392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.164666', 'step': 10392, 'epoch': 2} {'type': 'loss', 'content': 0.14463277161121368, 'timestamp': '2025-09-30 22:24:54.169489', 'step': 10393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.203123', 'step': 10393, 'epoch': 2} {'type': 'loss', 'content': 0.18105152249336243, 'timestamp': '2025-09-30 22:24:54.206633', 'step': 10394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.241736', 'step': 10394, 'epoch': 2} {'type': 'loss', 'content': 0.07885684072971344, 'timestamp': '2025-09-30 22:24:54.244716', 'step': 10395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:24:54.277849', 'step': 10395, 'epoch': 2} {'type': 'loss', 'content': 0.1672441065311432, 'timestamp': '2025-09-30 22:24:54.303580', 'step': 10396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:54.336426', 'step': 10396, 'epoch': 2} {'type': 'loss', 'content': 0.1809293031692505, 'timestamp': '2025-09-30 22:24:54.344680', 'step': 10397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:54.379189', 'step': 10397, 'epoch': 2} {'type': 'loss', 'content': 0.18380695581436157, 'timestamp': '2025-09-30 22:24:54.384541', 'step': 10398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.416650', 'step': 10398, 'epoch': 2} {'type': 'loss', 'content': 0.13184715807437897, 'timestamp': '2025-09-30 22:24:54.427248', 'step': 10399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:54.462097', 'step': 10399, 'epoch': 2} {'type': 'loss', 'content': 0.1534658819437027, 'timestamp': '2025-09-30 22:24:54.488335', 'step': 10400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.520101', 'step': 10400, 'epoch': 2} {'type': 'loss', 'content': 0.1195787563920021, 'timestamp': '2025-09-30 22:24:54.523389', 'step': 10401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.554769', 'step': 10401, 'epoch': 2} {'type': 'loss', 'content': 0.08296404033899307, 'timestamp': '2025-09-30 22:24:54.558386', 'step': 10402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.588781', 'step': 10402, 'epoch': 2} {'type': 'loss', 'content': 0.09853257983922958, 'timestamp': '2025-09-30 22:24:54.593405', 'step': 10403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:54.636108', 'step': 10403, 'epoch': 2} {'type': 'loss', 'content': 0.06612975895404816, 'timestamp': '2025-09-30 22:24:54.666979', 'step': 10404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.697856', 'step': 10404, 'epoch': 2} {'type': 'loss', 'content': 0.10821721702814102, 'timestamp': '2025-09-30 22:24:54.703165', 'step': 10405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:54.733656', 'step': 10405, 'epoch': 2} {'type': 'loss', 'content': 0.12961825728416443, 'timestamp': '2025-09-30 22:24:54.738178', 'step': 10406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:54.771751', 'step': 10406, 'epoch': 2} {'type': 'loss', 'content': 0.0953473150730133, 'timestamp': '2025-09-30 22:24:54.776516', 'step': 10407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.818172', 'step': 10407, 'epoch': 2} {'type': 'loss', 'content': 0.09258110076189041, 'timestamp': '2025-09-30 22:24:54.844594', 'step': 10408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:54.875539', 'step': 10408, 'epoch': 2} {'type': 'loss', 'content': 0.06952173262834549, 'timestamp': '2025-09-30 22:24:54.878620', 'step': 10409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.910394', 'step': 10409, 'epoch': 2} {'type': 'loss', 'content': 0.075571209192276, 'timestamp': '2025-09-30 22:24:54.914116', 'step': 10410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:54.947273', 'step': 10410, 'epoch': 2} {'type': 'loss', 'content': 0.11485859006643295, 'timestamp': '2025-09-30 22:24:54.950232', 'step': 10411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:54.995562', 'step': 10411, 'epoch': 2} {'type': 'loss', 'content': 0.20749998092651367, 'timestamp': '2025-09-30 22:24:55.024257', 'step': 10412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.055327', 'step': 10412, 'epoch': 2} {'type': 'loss', 'content': 0.1169983372092247, 'timestamp': '2025-09-30 22:24:55.058739', 'step': 10413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.090531', 'step': 10413, 'epoch': 2} {'type': 'loss', 'content': 0.10880119353532791, 'timestamp': '2025-09-30 22:24:55.094594', 'step': 10414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:55.128660', 'step': 10414, 'epoch': 2} {'type': 'loss', 'content': 0.08967253565788269, 'timestamp': '2025-09-30 22:24:55.131677', 'step': 10415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.166155', 'step': 10415, 'epoch': 2} {'type': 'loss', 'content': 0.07907058298587799, 'timestamp': '2025-09-30 22:24:55.191274', 'step': 10416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.224490', 'step': 10416, 'epoch': 2} {'type': 'loss', 'content': 0.1365543007850647, 'timestamp': '2025-09-30 22:24:55.230487', 'step': 10417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:55.266047', 'step': 10417, 'epoch': 2} {'type': 'loss', 'content': 0.19875375926494598, 'timestamp': '2025-09-30 22:24:55.268961', 'step': 10418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.300904', 'step': 10418, 'epoch': 2} {'type': 'loss', 'content': 0.12332028150558472, 'timestamp': '2025-09-30 22:24:55.306337', 'step': 10419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.341962', 'step': 10419, 'epoch': 2} {'type': 'loss', 'content': 0.1724303960800171, 'timestamp': '2025-09-30 22:24:55.367858', 'step': 10420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.404090', 'step': 10420, 'epoch': 2} {'type': 'loss', 'content': 0.11249848455190659, 'timestamp': '2025-09-30 22:24:55.413049', 'step': 10421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.446793', 'step': 10421, 'epoch': 2} {'type': 'loss', 'content': 0.135567307472229, 'timestamp': '2025-09-30 22:24:55.449652', 'step': 10422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:55.480407', 'step': 10422, 'epoch': 2} {'type': 'loss', 'content': 0.13581041991710663, 'timestamp': '2025-09-30 22:24:55.484226', 'step': 10423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.515487', 'step': 10423, 'epoch': 2} {'type': 'loss', 'content': 0.12376466393470764, 'timestamp': '2025-09-30 22:24:55.541504', 'step': 10424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.572322', 'step': 10424, 'epoch': 2} {'type': 'loss', 'content': 0.158431738615036, 'timestamp': '2025-09-30 22:24:55.576231', 'step': 10425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:55.606480', 'step': 10425, 'epoch': 2} {'type': 'loss', 'content': 0.15404069423675537, 'timestamp': '2025-09-30 22:24:55.611724', 'step': 10426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.642470', 'step': 10426, 'epoch': 2} {'type': 'loss', 'content': 0.10870365798473358, 'timestamp': '2025-09-30 22:24:55.646505', 'step': 10427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.677183', 'step': 10427, 'epoch': 2} {'type': 'loss', 'content': 0.05960850045084953, 'timestamp': '2025-09-30 22:24:55.707743', 'step': 10428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.741269', 'step': 10428, 'epoch': 2} {'type': 'loss', 'content': 0.10578587651252747, 'timestamp': '2025-09-30 22:24:55.744352', 'step': 10429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.775273', 'step': 10429, 'epoch': 2} {'type': 'loss', 'content': 0.11414123326539993, 'timestamp': '2025-09-30 22:24:55.778120', 'step': 10430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:55.814355', 'step': 10430, 'epoch': 2} {'type': 'loss', 'content': 0.17781692743301392, 'timestamp': '2025-09-30 22:24:55.818556', 'step': 10431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.849886', 'step': 10431, 'epoch': 2} {'type': 'loss', 'content': 0.14872236549854279, 'timestamp': '2025-09-30 22:24:55.874776', 'step': 10432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:55.906190', 'step': 10432, 'epoch': 2} {'type': 'loss', 'content': 0.13499119877815247, 'timestamp': '2025-09-30 22:24:55.909874', 'step': 10433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:55.942870', 'step': 10433, 'epoch': 2} {'type': 'loss', 'content': 0.0658353790640831, 'timestamp': '2025-09-30 22:24:55.946396', 'step': 10434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:55.977230', 'step': 10434, 'epoch': 2} {'type': 'loss', 'content': 0.10490654408931732, 'timestamp': '2025-09-30 22:24:55.989105', 'step': 10435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:56.019837', 'step': 10435, 'epoch': 2} {'type': 'loss', 'content': 0.06025215610861778, 'timestamp': '2025-09-30 22:24:56.044966', 'step': 10436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:24:56.077326', 'step': 10436, 'epoch': 2} {'type': 'loss', 'content': 0.13605539500713348, 'timestamp': '2025-09-30 22:24:56.081817', 'step': 10437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:56.112375', 'step': 10437, 'epoch': 2} {'type': 'loss', 'content': 0.07363390922546387, 'timestamp': '2025-09-30 22:24:56.115269', 'step': 10438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.146852', 'step': 10438, 'epoch': 2} {'type': 'loss', 'content': 0.04566522687673569, 'timestamp': '2025-09-30 22:24:56.153052', 'step': 10439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:56.189846', 'step': 10439, 'epoch': 2} {'type': 'loss', 'content': 0.15639784932136536, 'timestamp': '2025-09-30 22:24:56.215107', 'step': 10440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:56.247341', 'step': 10440, 'epoch': 2} {'type': 'loss', 'content': 0.07682246714830399, 'timestamp': '2025-09-30 22:24:56.250155', 'step': 10441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:56.281690', 'step': 10441, 'epoch': 2} {'type': 'loss', 'content': 0.09623464941978455, 'timestamp': '2025-09-30 22:24:56.284159', 'step': 10442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.317751', 'step': 10442, 'epoch': 2} {'type': 'loss', 'content': 0.08207008987665176, 'timestamp': '2025-09-30 22:24:56.321131', 'step': 10443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:56.356642', 'step': 10443, 'epoch': 2} {'type': 'loss', 'content': 0.10836821794509888, 'timestamp': '2025-09-30 22:24:56.385148', 'step': 10444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.418075', 'step': 10444, 'epoch': 2} {'type': 'loss', 'content': 0.07772000879049301, 'timestamp': '2025-09-30 22:24:56.420284', 'step': 10445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:56.451487', 'step': 10445, 'epoch': 2} {'type': 'loss', 'content': 0.08532244712114334, 'timestamp': '2025-09-30 22:24:56.457335', 'step': 10446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:56.489812', 'step': 10446, 'epoch': 2} {'type': 'loss', 'content': 0.05643313750624657, 'timestamp': '2025-09-30 22:24:56.494346', 'step': 10447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:56.527029', 'step': 10447, 'epoch': 2} {'type': 'loss', 'content': 0.13183905184268951, 'timestamp': '2025-09-30 22:24:56.552528', 'step': 10448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:56.584902', 'step': 10448, 'epoch': 2} {'type': 'loss', 'content': 0.17919154465198517, 'timestamp': '2025-09-30 22:24:56.588242', 'step': 10449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:56.624478', 'step': 10449, 'epoch': 2} {'type': 'loss', 'content': 0.11329180747270584, 'timestamp': '2025-09-30 22:24:56.626852', 'step': 10450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:56.659322', 'step': 10450, 'epoch': 2} {'type': 'loss', 'content': 0.09749524295330048, 'timestamp': '2025-09-30 22:24:56.673519', 'step': 10451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.707659', 'step': 10451, 'epoch': 2} {'type': 'loss', 'content': 0.17246389389038086, 'timestamp': '2025-09-30 22:24:56.734203', 'step': 10452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:56.765788', 'step': 10452, 'epoch': 2} {'type': 'loss', 'content': 0.15973085165023804, 'timestamp': '2025-09-30 22:24:56.769077', 'step': 10453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.801677', 'step': 10453, 'epoch': 2} {'type': 'loss', 'content': 0.1352466195821762, 'timestamp': '2025-09-30 22:24:56.804799', 'step': 10454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.837008', 'step': 10454, 'epoch': 2} {'type': 'loss', 'content': 0.13538745045661926, 'timestamp': '2025-09-30 22:24:56.841243', 'step': 10455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:56.886076', 'step': 10455, 'epoch': 2} {'type': 'loss', 'content': 0.09013564139604568, 'timestamp': '2025-09-30 22:24:56.911041', 'step': 10456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:56.943694', 'step': 10456, 'epoch': 2} {'type': 'loss', 'content': 0.09899494796991348, 'timestamp': '2025-09-30 22:24:56.946725', 'step': 10457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:56.984844', 'step': 10457, 'epoch': 2} {'type': 'loss', 'content': 0.10325060039758682, 'timestamp': '2025-09-30 22:24:56.989052', 'step': 10458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:24:57.028359', 'step': 10458, 'epoch': 2} {'type': 'loss', 'content': 0.10474991053342819, 'timestamp': '2025-09-30 22:24:57.032752', 'step': 10459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:57.068014', 'step': 10459, 'epoch': 2} {'type': 'loss', 'content': 0.10059979557991028, 'timestamp': '2025-09-30 22:24:57.092933', 'step': 10460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.126984', 'step': 10460, 'epoch': 2} {'type': 'loss', 'content': 0.06598269939422607, 'timestamp': '2025-09-30 22:24:57.130985', 'step': 10461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.164379', 'step': 10461, 'epoch': 2} {'type': 'loss', 'content': 0.12964841723442078, 'timestamp': '2025-09-30 22:24:57.168197', 'step': 10462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.205806', 'step': 10462, 'epoch': 2} {'type': 'loss', 'content': 0.14184893667697906, 'timestamp': '2025-09-30 22:24:57.212365', 'step': 10463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.243719', 'step': 10463, 'epoch': 2} {'type': 'loss', 'content': 0.0666828528046608, 'timestamp': '2025-09-30 22:24:57.268430', 'step': 10464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:57.304274', 'step': 10464, 'epoch': 2} {'type': 'loss', 'content': 0.03201710432767868, 'timestamp': '2025-09-30 22:24:57.308018', 'step': 10465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.339723', 'step': 10465, 'epoch': 2} {'type': 'loss', 'content': 0.1259164959192276, 'timestamp': '2025-09-30 22:24:57.347848', 'step': 10466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:57.385674', 'step': 10466, 'epoch': 2} {'type': 'loss', 'content': 0.11790212988853455, 'timestamp': '2025-09-30 22:24:57.392096', 'step': 10467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.427789', 'step': 10467, 'epoch': 2} {'type': 'loss', 'content': 0.03801661729812622, 'timestamp': '2025-09-30 22:24:57.453956', 'step': 10468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:57.486305', 'step': 10468, 'epoch': 2} {'type': 'loss', 'content': 0.11936808377504349, 'timestamp': '2025-09-30 22:24:57.489019', 'step': 10469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:24:57.520382', 'step': 10469, 'epoch': 2} {'type': 'loss', 'content': 0.21147306263446808, 'timestamp': '2025-09-30 22:24:57.523697', 'step': 10470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:57.559923', 'step': 10470, 'epoch': 2} {'type': 'loss', 'content': 0.07050498574972153, 'timestamp': '2025-09-30 22:24:57.564219', 'step': 10471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.601364', 'step': 10471, 'epoch': 2} {'type': 'loss', 'content': 0.03739007189869881, 'timestamp': '2025-09-30 22:24:57.634388', 'step': 10472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.669184', 'step': 10472, 'epoch': 2} {'type': 'loss', 'content': 0.10684581100940704, 'timestamp': '2025-09-30 22:24:57.672492', 'step': 10473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:57.705856', 'step': 10473, 'epoch': 2} {'type': 'loss', 'content': 0.0640762448310852, 'timestamp': '2025-09-30 22:24:57.710329', 'step': 10474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.742439', 'step': 10474, 'epoch': 2} {'type': 'loss', 'content': 0.05345924571156502, 'timestamp': '2025-09-30 22:24:57.749254', 'step': 10475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.787409', 'step': 10475, 'epoch': 2} {'type': 'loss', 'content': 0.14261355996131897, 'timestamp': '2025-09-30 22:24:57.813674', 'step': 10476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:57.847122', 'step': 10476, 'epoch': 2} {'type': 'loss', 'content': 0.11403239518404007, 'timestamp': '2025-09-30 22:24:57.861888', 'step': 10477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:57.893651', 'step': 10477, 'epoch': 2} {'type': 'loss', 'content': 0.18281400203704834, 'timestamp': '2025-09-30 22:24:57.901147', 'step': 10478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:57.933063', 'step': 10478, 'epoch': 2} {'type': 'loss', 'content': 0.05386469140648842, 'timestamp': '2025-09-30 22:24:57.938251', 'step': 10479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:57.971193', 'step': 10479, 'epoch': 2} {'type': 'loss', 'content': 0.11099633574485779, 'timestamp': '2025-09-30 22:24:57.996979', 'step': 10480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.028614', 'step': 10480, 'epoch': 2} {'type': 'loss', 'content': 0.1025872752070427, 'timestamp': '2025-09-30 22:24:58.037121', 'step': 10481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:58.075529', 'step': 10481, 'epoch': 2} {'type': 'loss', 'content': 0.13353703916072845, 'timestamp': '2025-09-30 22:24:58.078747', 'step': 10482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.111080', 'step': 10482, 'epoch': 2} {'type': 'loss', 'content': 0.12636011838912964, 'timestamp': '2025-09-30 22:24:58.120260', 'step': 10483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:58.151328', 'step': 10483, 'epoch': 2} {'type': 'loss', 'content': 0.08306021988391876, 'timestamp': '2025-09-30 22:24:58.176452', 'step': 10484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:58.210735', 'step': 10484, 'epoch': 2} {'type': 'loss', 'content': 0.11972284317016602, 'timestamp': '2025-09-30 22:24:58.217821', 'step': 10485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.252176', 'step': 10485, 'epoch': 2} {'type': 'loss', 'content': 0.03033275716006756, 'timestamp': '2025-09-30 22:24:58.261940', 'step': 10486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:58.296062', 'step': 10486, 'epoch': 2} {'type': 'loss', 'content': 0.2262340486049652, 'timestamp': '2025-09-30 22:24:58.300023', 'step': 10487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:58.332085', 'step': 10487, 'epoch': 2} {'type': 'loss', 'content': 0.1400100588798523, 'timestamp': '2025-09-30 22:24:58.356357', 'step': 10488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.386639', 'step': 10488, 'epoch': 2} {'type': 'loss', 'content': 0.06724240630865097, 'timestamp': '2025-09-30 22:24:58.389849', 'step': 10489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.420113', 'step': 10489, 'epoch': 2} {'type': 'loss', 'content': 0.1278734803199768, 'timestamp': '2025-09-30 22:24:58.424112', 'step': 10490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.457385', 'step': 10490, 'epoch': 2} {'type': 'loss', 'content': 0.12566503882408142, 'timestamp': '2025-09-30 22:24:58.467642', 'step': 10491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.500000', 'step': 10491, 'epoch': 2} {'type': 'loss', 'content': 0.11415567249059677, 'timestamp': '2025-09-30 22:24:58.528635', 'step': 10492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:24:58.562264', 'step': 10492, 'epoch': 2} {'type': 'loss', 'content': 0.1386546790599823, 'timestamp': '2025-09-30 22:24:58.576383', 'step': 10493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.615964', 'step': 10493, 'epoch': 2} {'type': 'loss', 'content': 0.1204281747341156, 'timestamp': '2025-09-30 22:24:58.619131', 'step': 10494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:24:58.650994', 'step': 10494, 'epoch': 2} {'type': 'loss', 'content': 0.09198091924190521, 'timestamp': '2025-09-30 22:24:58.655916', 'step': 10495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:58.687350', 'step': 10495, 'epoch': 2} {'type': 'loss', 'content': 0.15782172977924347, 'timestamp': '2025-09-30 22:24:58.720640', 'step': 10496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:58.752451', 'step': 10496, 'epoch': 2} {'type': 'loss', 'content': 0.08147416263818741, 'timestamp': '2025-09-30 22:24:58.756422', 'step': 10497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:24:58.799749', 'step': 10497, 'epoch': 2} {'type': 'loss', 'content': 0.1629302203655243, 'timestamp': '2025-09-30 22:24:58.802698', 'step': 10498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:24:58.844429', 'step': 10498, 'epoch': 2} {'type': 'loss', 'content': 0.06229190155863762, 'timestamp': '2025-09-30 22:24:58.852282', 'step': 10499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:24:58.887426', 'step': 10499, 'epoch': 2} {'type': 'loss', 'content': 0.16345788538455963, 'timestamp': '2025-09-30 22:24:58.917852', 'step': 10500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 10500', 'timestamp': '2025-09-30 22:25:03.922078', 'step': 10500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:03.977822', 'step': 10500, 'epoch': 2} {'type': 'loss', 'content': 0.07384100556373596, 'timestamp': '2025-09-30 22:25:03.982710', 'step': 10501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.016938', 'step': 10501, 'epoch': 2} {'type': 'loss', 'content': 0.08345287293195724, 'timestamp': '2025-09-30 22:25:04.024500', 'step': 10502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.071471', 'step': 10502, 'epoch': 2} {'type': 'loss', 'content': 0.1284717619419098, 'timestamp': '2025-09-30 22:25:04.083663', 'step': 10503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.127287', 'step': 10503, 'epoch': 2} {'type': 'loss', 'content': 0.1492050439119339, 'timestamp': '2025-09-30 22:25:04.152549', 'step': 10504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:04.187962', 'step': 10504, 'epoch': 2} {'type': 'loss', 'content': 0.23016048967838287, 'timestamp': '2025-09-30 22:25:04.191619', 'step': 10505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.223402', 'step': 10505, 'epoch': 2} {'type': 'loss', 'content': 0.07671802490949631, 'timestamp': '2025-09-30 22:25:04.227127', 'step': 10506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:04.268958', 'step': 10506, 'epoch': 2} {'type': 'loss', 'content': 0.17769236862659454, 'timestamp': '2025-09-30 22:25:04.272395', 'step': 10507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:04.303989', 'step': 10507, 'epoch': 2} {'type': 'loss', 'content': 0.07495659589767456, 'timestamp': '2025-09-30 22:25:04.328624', 'step': 10508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:04.359631', 'step': 10508, 'epoch': 2} {'type': 'loss', 'content': 0.09173101931810379, 'timestamp': '2025-09-30 22:25:04.362247', 'step': 10509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.407506', 'step': 10509, 'epoch': 2} {'type': 'loss', 'content': 0.053735505789518356, 'timestamp': '2025-09-30 22:25:04.418450', 'step': 10510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.454515', 'step': 10510, 'epoch': 2} {'type': 'loss', 'content': 0.14924676716327667, 'timestamp': '2025-09-30 22:25:04.459029', 'step': 10511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.491131', 'step': 10511, 'epoch': 2} {'type': 'loss', 'content': 0.0966641828417778, 'timestamp': '2025-09-30 22:25:04.516137', 'step': 10512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.547806', 'step': 10512, 'epoch': 2} {'type': 'loss', 'content': 0.12693113088607788, 'timestamp': '2025-09-30 22:25:04.552899', 'step': 10513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:04.597992', 'step': 10513, 'epoch': 2} {'type': 'loss', 'content': 0.0977470651268959, 'timestamp': '2025-09-30 22:25:04.602239', 'step': 10514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.634985', 'step': 10514, 'epoch': 2} {'type': 'loss', 'content': 0.08012595027685165, 'timestamp': '2025-09-30 22:25:04.639121', 'step': 10515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.674902', 'step': 10515, 'epoch': 2} {'type': 'loss', 'content': 0.12837375700473785, 'timestamp': '2025-09-30 22:25:04.700864', 'step': 10516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.736398', 'step': 10516, 'epoch': 2} {'type': 'loss', 'content': 0.04884570464491844, 'timestamp': '2025-09-30 22:25:04.740624', 'step': 10517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.770910', 'step': 10517, 'epoch': 2} {'type': 'loss', 'content': 0.17814958095550537, 'timestamp': '2025-09-30 22:25:04.773900', 'step': 10518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:04.808027', 'step': 10518, 'epoch': 2} {'type': 'loss', 'content': 0.09540809690952301, 'timestamp': '2025-09-30 22:25:04.811964', 'step': 10519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.841796', 'step': 10519, 'epoch': 2} {'type': 'loss', 'content': 0.17368647456169128, 'timestamp': '2025-09-30 22:25:04.866846', 'step': 10520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:04.902468', 'step': 10520, 'epoch': 2} {'type': 'loss', 'content': 0.01259652990847826, 'timestamp': '2025-09-30 22:25:04.907051', 'step': 10521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:04.939161', 'step': 10521, 'epoch': 2} {'type': 'loss', 'content': 0.06673122197389603, 'timestamp': '2025-09-30 22:25:04.944005', 'step': 10522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:04.975623', 'step': 10522, 'epoch': 2} {'type': 'loss', 'content': 0.22294436395168304, 'timestamp': '2025-09-30 22:25:04.980114', 'step': 10523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.033521', 'step': 10523, 'epoch': 2} {'type': 'loss', 'content': 0.11847078800201416, 'timestamp': '2025-09-30 22:25:05.062923', 'step': 10524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.097098', 'step': 10524, 'epoch': 2} {'type': 'loss', 'content': 0.085452601313591, 'timestamp': '2025-09-30 22:25:05.102337', 'step': 10525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.139216', 'step': 10525, 'epoch': 2} {'type': 'loss', 'content': 0.06264621764421463, 'timestamp': '2025-09-30 22:25:05.149616', 'step': 10526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:05.194263', 'step': 10526, 'epoch': 2} {'type': 'loss', 'content': 0.07488815486431122, 'timestamp': '2025-09-30 22:25:05.197058', 'step': 10527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.227484', 'step': 10527, 'epoch': 2} {'type': 'loss', 'content': 0.07082189619541168, 'timestamp': '2025-09-30 22:25:05.253403', 'step': 10528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.286290', 'step': 10528, 'epoch': 2} {'type': 'loss', 'content': 0.1966264247894287, 'timestamp': '2025-09-30 22:25:05.290444', 'step': 10529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.331202', 'step': 10529, 'epoch': 2} {'type': 'loss', 'content': 0.12928159534931183, 'timestamp': '2025-09-30 22:25:05.336682', 'step': 10530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.368446', 'step': 10530, 'epoch': 2} {'type': 'loss', 'content': 0.1490754783153534, 'timestamp': '2025-09-30 22:25:05.373453', 'step': 10531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.406802', 'step': 10531, 'epoch': 2} {'type': 'loss', 'content': 0.1085561066865921, 'timestamp': '2025-09-30 22:25:05.434757', 'step': 10532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.465588', 'step': 10532, 'epoch': 2} {'type': 'loss', 'content': 0.14391304552555084, 'timestamp': '2025-09-30 22:25:05.471381', 'step': 10533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.504018', 'step': 10533, 'epoch': 2} {'type': 'loss', 'content': 0.09516412019729614, 'timestamp': '2025-09-30 22:25:05.512789', 'step': 10534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:05.547288', 'step': 10534, 'epoch': 2} {'type': 'loss', 'content': 0.06819050014019012, 'timestamp': '2025-09-30 22:25:05.550776', 'step': 10535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.584974', 'step': 10535, 'epoch': 2} {'type': 'loss', 'content': 0.05264905095100403, 'timestamp': '2025-09-30 22:25:05.609905', 'step': 10536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:05.640262', 'step': 10536, 'epoch': 2} {'type': 'loss', 'content': 0.12969444692134857, 'timestamp': '2025-09-30 22:25:05.644962', 'step': 10537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.678190', 'step': 10537, 'epoch': 2} {'type': 'loss', 'content': 0.07732626050710678, 'timestamp': '2025-09-30 22:25:05.680844', 'step': 10538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.714481', 'step': 10538, 'epoch': 2} {'type': 'loss', 'content': 0.09851084649562836, 'timestamp': '2025-09-30 22:25:05.717582', 'step': 10539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.755039', 'step': 10539, 'epoch': 2} {'type': 'loss', 'content': 0.23670515418052673, 'timestamp': '2025-09-30 22:25:05.782500', 'step': 10540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.814266', 'step': 10540, 'epoch': 2} {'type': 'loss', 'content': 0.1600065380334854, 'timestamp': '2025-09-30 22:25:05.820117', 'step': 10541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.853942', 'step': 10541, 'epoch': 2} {'type': 'loss', 'content': 0.0956546738743782, 'timestamp': '2025-09-30 22:25:05.871764', 'step': 10542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:05.910992', 'step': 10542, 'epoch': 2} {'type': 'loss', 'content': 0.1825716197490692, 'timestamp': '2025-09-30 22:25:05.916038', 'step': 10543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:05.953887', 'step': 10543, 'epoch': 2} {'type': 'loss', 'content': 0.08056392520666122, 'timestamp': '2025-09-30 22:25:05.980324', 'step': 10544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:06.011169', 'step': 10544, 'epoch': 2} {'type': 'loss', 'content': 0.05269552022218704, 'timestamp': '2025-09-30 22:25:06.024674', 'step': 10545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:06.059901', 'step': 10545, 'epoch': 2} {'type': 'loss', 'content': 0.1020921990275383, 'timestamp': '2025-09-30 22:25:06.063951', 'step': 10546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.102336', 'step': 10546, 'epoch': 2} {'type': 'loss', 'content': 0.09924619644880295, 'timestamp': '2025-09-30 22:25:06.124007', 'step': 10547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:06.154527', 'step': 10547, 'epoch': 2} {'type': 'loss', 'content': 0.15685799717903137, 'timestamp': '2025-09-30 22:25:06.180091', 'step': 10548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.217054', 'step': 10548, 'epoch': 2} {'type': 'loss', 'content': 0.16765189170837402, 'timestamp': '2025-09-30 22:25:06.222657', 'step': 10549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:06.260092', 'step': 10549, 'epoch': 2} {'type': 'loss', 'content': 0.04925026372075081, 'timestamp': '2025-09-30 22:25:06.270670', 'step': 10550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.306416', 'step': 10550, 'epoch': 2} {'type': 'loss', 'content': 0.17284570634365082, 'timestamp': '2025-09-30 22:25:06.310619', 'step': 10551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:06.345962', 'step': 10551, 'epoch': 2} {'type': 'loss', 'content': 0.12126905471086502, 'timestamp': '2025-09-30 22:25:06.371637', 'step': 10552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.400995', 'step': 10552, 'epoch': 2} {'type': 'loss', 'content': 0.14353755116462708, 'timestamp': '2025-09-30 22:25:06.407394', 'step': 10553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.440076', 'step': 10553, 'epoch': 2} {'type': 'loss', 'content': 0.07988899201154709, 'timestamp': '2025-09-30 22:25:06.449273', 'step': 10554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.483896', 'step': 10554, 'epoch': 2} {'type': 'loss', 'content': 0.11036217212677002, 'timestamp': '2025-09-30 22:25:06.486332', 'step': 10555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.521261', 'step': 10555, 'epoch': 2} {'type': 'loss', 'content': 0.11400562524795532, 'timestamp': '2025-09-30 22:25:06.546726', 'step': 10556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.578945', 'step': 10556, 'epoch': 2} {'type': 'loss', 'content': 0.08287554979324341, 'timestamp': '2025-09-30 22:25:06.583236', 'step': 10557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.614454', 'step': 10557, 'epoch': 2} {'type': 'loss', 'content': 0.09288779646158218, 'timestamp': '2025-09-30 22:25:06.618697', 'step': 10558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.651711', 'step': 10558, 'epoch': 2} {'type': 'loss', 'content': 0.08704866468906403, 'timestamp': '2025-09-30 22:25:06.656046', 'step': 10559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:06.689102', 'step': 10559, 'epoch': 2} {'type': 'loss', 'content': 0.10455723851919174, 'timestamp': '2025-09-30 22:25:06.714938', 'step': 10560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:06.748299', 'step': 10560, 'epoch': 2} {'type': 'loss', 'content': 0.146672323346138, 'timestamp': '2025-09-30 22:25:06.761896', 'step': 10561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.798501', 'step': 10561, 'epoch': 2} {'type': 'loss', 'content': 0.07202059030532837, 'timestamp': '2025-09-30 22:25:06.802516', 'step': 10562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:06.833794', 'step': 10562, 'epoch': 2} {'type': 'loss', 'content': 0.18877466022968292, 'timestamp': '2025-09-30 22:25:06.837359', 'step': 10563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:06.869044', 'step': 10563, 'epoch': 2} {'type': 'loss', 'content': 0.08890995383262634, 'timestamp': '2025-09-30 22:25:06.900592', 'step': 10564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:06.932562', 'step': 10564, 'epoch': 2} {'type': 'loss', 'content': 0.11110008507966995, 'timestamp': '2025-09-30 22:25:06.937265', 'step': 10565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:06.980636', 'step': 10565, 'epoch': 2} {'type': 'loss', 'content': 0.11058369278907776, 'timestamp': '2025-09-30 22:25:06.984841', 'step': 10566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.017324', 'step': 10566, 'epoch': 2} {'type': 'loss', 'content': 0.09930919855833054, 'timestamp': '2025-09-30 22:25:07.026464', 'step': 10567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:07.061142', 'step': 10567, 'epoch': 2} {'type': 'loss', 'content': 0.09056130796670914, 'timestamp': '2025-09-30 22:25:07.087130', 'step': 10568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.117792', 'step': 10568, 'epoch': 2} {'type': 'loss', 'content': 0.05806071683764458, 'timestamp': '2025-09-30 22:25:07.129761', 'step': 10569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:07.161654', 'step': 10569, 'epoch': 2} {'type': 'loss', 'content': 0.07224415242671967, 'timestamp': '2025-09-30 22:25:07.165498', 'step': 10570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:07.197580', 'step': 10570, 'epoch': 2} {'type': 'loss', 'content': 0.08444338291883469, 'timestamp': '2025-09-30 22:25:07.200615', 'step': 10571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.232900', 'step': 10571, 'epoch': 2} {'type': 'loss', 'content': 0.1508605033159256, 'timestamp': '2025-09-30 22:25:07.262333', 'step': 10572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.295882', 'step': 10572, 'epoch': 2} {'type': 'loss', 'content': 0.12435995787382126, 'timestamp': '2025-09-30 22:25:07.298680', 'step': 10573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.332793', 'step': 10573, 'epoch': 2} {'type': 'loss', 'content': 0.12394603341817856, 'timestamp': '2025-09-30 22:25:07.337393', 'step': 10574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.369160', 'step': 10574, 'epoch': 2} {'type': 'loss', 'content': 0.10644099861383438, 'timestamp': '2025-09-30 22:25:07.372711', 'step': 10575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:07.404536', 'step': 10575, 'epoch': 2} {'type': 'loss', 'content': 0.1890045404434204, 'timestamp': '2025-09-30 22:25:07.429289', 'step': 10576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.464725', 'step': 10576, 'epoch': 2} {'type': 'loss', 'content': 0.16053281724452972, 'timestamp': '2025-09-30 22:25:07.468620', 'step': 10577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.503159', 'step': 10577, 'epoch': 2} {'type': 'loss', 'content': 0.056890565901994705, 'timestamp': '2025-09-30 22:25:07.508235', 'step': 10578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:07.540233', 'step': 10578, 'epoch': 2} {'type': 'loss', 'content': 0.10640083253383636, 'timestamp': '2025-09-30 22:25:07.543631', 'step': 10579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:07.575961', 'step': 10579, 'epoch': 2} {'type': 'loss', 'content': 0.008555502630770206, 'timestamp': '2025-09-30 22:25:07.602364', 'step': 10580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.638004', 'step': 10580, 'epoch': 2} {'type': 'loss', 'content': 0.07938677072525024, 'timestamp': '2025-09-30 22:25:07.642427', 'step': 10581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:07.675699', 'step': 10581, 'epoch': 2} {'type': 'loss', 'content': 0.11943379789590836, 'timestamp': '2025-09-30 22:25:07.678229', 'step': 10582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.710017', 'step': 10582, 'epoch': 2} {'type': 'loss', 'content': 0.06478579342365265, 'timestamp': '2025-09-30 22:25:07.713867', 'step': 10583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:07.745900', 'step': 10583, 'epoch': 2} {'type': 'loss', 'content': 0.06526175141334534, 'timestamp': '2025-09-30 22:25:07.781088', 'step': 10584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:07.814287', 'step': 10584, 'epoch': 2} {'type': 'loss', 'content': 0.10902877151966095, 'timestamp': '2025-09-30 22:25:07.818451', 'step': 10585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:07.850073', 'step': 10585, 'epoch': 2} {'type': 'loss', 'content': 0.12198144942522049, 'timestamp': '2025-09-30 22:25:07.852696', 'step': 10586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:07.885347', 'step': 10586, 'epoch': 2} {'type': 'loss', 'content': 0.0684836208820343, 'timestamp': '2025-09-30 22:25:07.892745', 'step': 10587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:07.928186', 'step': 10587, 'epoch': 2} {'type': 'loss', 'content': 0.07430588454008102, 'timestamp': '2025-09-30 22:25:07.953428', 'step': 10588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:07.985280', 'step': 10588, 'epoch': 2} {'type': 'loss', 'content': 0.09725084155797958, 'timestamp': '2025-09-30 22:25:07.989749', 'step': 10589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.020790', 'step': 10589, 'epoch': 2} {'type': 'loss', 'content': 0.12297279387712479, 'timestamp': '2025-09-30 22:25:08.027462', 'step': 10590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.062606', 'step': 10590, 'epoch': 2} {'type': 'loss', 'content': 0.11719251424074173, 'timestamp': '2025-09-30 22:25:08.070459', 'step': 10591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.113032', 'step': 10591, 'epoch': 2} {'type': 'loss', 'content': 0.12152102589607239, 'timestamp': '2025-09-30 22:25:08.138714', 'step': 10592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.168857', 'step': 10592, 'epoch': 2} {'type': 'loss', 'content': 0.15659412741661072, 'timestamp': '2025-09-30 22:25:08.172121', 'step': 10593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.202954', 'step': 10593, 'epoch': 2} {'type': 'loss', 'content': 0.08742550015449524, 'timestamp': '2025-09-30 22:25:08.206677', 'step': 10594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.240266', 'step': 10594, 'epoch': 2} {'type': 'loss', 'content': 0.12332137674093246, 'timestamp': '2025-09-30 22:25:08.244640', 'step': 10595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.276206', 'step': 10595, 'epoch': 2} {'type': 'loss', 'content': 0.13718050718307495, 'timestamp': '2025-09-30 22:25:08.302705', 'step': 10596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:08.336369', 'step': 10596, 'epoch': 2} {'type': 'loss', 'content': 0.08962775021791458, 'timestamp': '2025-09-30 22:25:08.341515', 'step': 10597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.372494', 'step': 10597, 'epoch': 2} {'type': 'loss', 'content': 0.09763681143522263, 'timestamp': '2025-09-30 22:25:08.375840', 'step': 10598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.407265', 'step': 10598, 'epoch': 2} {'type': 'loss', 'content': 0.062455467879772186, 'timestamp': '2025-09-30 22:25:08.410499', 'step': 10599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.440708', 'step': 10599, 'epoch': 2} {'type': 'loss', 'content': 0.16289407014846802, 'timestamp': '2025-09-30 22:25:08.467170', 'step': 10600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:08.498507', 'step': 10600, 'epoch': 2} {'type': 'loss', 'content': 0.20375752449035645, 'timestamp': '2025-09-30 22:25:08.502343', 'step': 10601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.533363', 'step': 10601, 'epoch': 2} {'type': 'loss', 'content': 0.06480934470891953, 'timestamp': '2025-09-30 22:25:08.537673', 'step': 10602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.575526', 'step': 10602, 'epoch': 2} {'type': 'loss', 'content': 0.1463305950164795, 'timestamp': '2025-09-30 22:25:08.578524', 'step': 10603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.610268', 'step': 10603, 'epoch': 2} {'type': 'loss', 'content': 0.07891964912414551, 'timestamp': '2025-09-30 22:25:08.648141', 'step': 10604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:08.678941', 'step': 10604, 'epoch': 2} {'type': 'loss', 'content': 0.1315867155790329, 'timestamp': '2025-09-30 22:25:08.683506', 'step': 10605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.716174', 'step': 10605, 'epoch': 2} {'type': 'loss', 'content': 0.1629909873008728, 'timestamp': '2025-09-30 22:25:08.728168', 'step': 10606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.768267', 'step': 10606, 'epoch': 2} {'type': 'loss', 'content': 0.10485223680734634, 'timestamp': '2025-09-30 22:25:08.771610', 'step': 10607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.803216', 'step': 10607, 'epoch': 2} {'type': 'loss', 'content': 0.15368662774562836, 'timestamp': '2025-09-30 22:25:08.828497', 'step': 10608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.861091', 'step': 10608, 'epoch': 2} {'type': 'loss', 'content': 0.15485301613807678, 'timestamp': '2025-09-30 22:25:08.866050', 'step': 10609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.896852', 'step': 10609, 'epoch': 2} {'type': 'loss', 'content': 0.1119665578007698, 'timestamp': '2025-09-30 22:25:08.899856', 'step': 10610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:08.930529', 'step': 10610, 'epoch': 2} {'type': 'loss', 'content': 0.18229621648788452, 'timestamp': '2025-09-30 22:25:08.934907', 'step': 10611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:08.968710', 'step': 10611, 'epoch': 2} {'type': 'loss', 'content': 0.09149685502052307, 'timestamp': '2025-09-30 22:25:08.993370', 'step': 10612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.024571', 'step': 10612, 'epoch': 2} {'type': 'loss', 'content': 0.12933696806430817, 'timestamp': '2025-09-30 22:25:09.029401', 'step': 10613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.078830', 'step': 10613, 'epoch': 2} {'type': 'loss', 'content': 0.11132094264030457, 'timestamp': '2025-09-30 22:25:09.081553', 'step': 10614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.113037', 'step': 10614, 'epoch': 2} {'type': 'loss', 'content': 0.15300731360912323, 'timestamp': '2025-09-30 22:25:09.117974', 'step': 10615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.152073', 'step': 10615, 'epoch': 2} {'type': 'loss', 'content': 0.13122424483299255, 'timestamp': '2025-09-30 22:25:09.179299', 'step': 10616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:09.211314', 'step': 10616, 'epoch': 2} {'type': 'loss', 'content': 0.11252675205469131, 'timestamp': '2025-09-30 22:25:09.216449', 'step': 10617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.249160', 'step': 10617, 'epoch': 2} {'type': 'loss', 'content': 0.10199201107025146, 'timestamp': '2025-09-30 22:25:09.253646', 'step': 10618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.284718', 'step': 10618, 'epoch': 2} {'type': 'loss', 'content': 0.0766703262925148, 'timestamp': '2025-09-30 22:25:09.300983', 'step': 10619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.341322', 'step': 10619, 'epoch': 2} {'type': 'loss', 'content': 0.1365034431219101, 'timestamp': '2025-09-30 22:25:09.374954', 'step': 10620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:09.413171', 'step': 10620, 'epoch': 2} {'type': 'loss', 'content': 0.14014075696468353, 'timestamp': '2025-09-30 22:25:09.418101', 'step': 10621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.451313', 'step': 10621, 'epoch': 2} {'type': 'loss', 'content': 0.09678902477025986, 'timestamp': '2025-09-30 22:25:09.454726', 'step': 10622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.487332', 'step': 10622, 'epoch': 2} {'type': 'loss', 'content': 0.16021768748760223, 'timestamp': '2025-09-30 22:25:09.489649', 'step': 10623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:09.521429', 'step': 10623, 'epoch': 2} {'type': 'loss', 'content': 0.15130119025707245, 'timestamp': '2025-09-30 22:25:09.548153', 'step': 10624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:09.582126', 'step': 10624, 'epoch': 2} {'type': 'loss', 'content': 0.16383907198905945, 'timestamp': '2025-09-30 22:25:09.591868', 'step': 10625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.629288', 'step': 10625, 'epoch': 2} {'type': 'loss', 'content': 0.11566270142793655, 'timestamp': '2025-09-30 22:25:09.633964', 'step': 10626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.666525', 'step': 10626, 'epoch': 2} {'type': 'loss', 'content': 0.1711803376674652, 'timestamp': '2025-09-30 22:25:09.670538', 'step': 10627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:09.702366', 'step': 10627, 'epoch': 2} {'type': 'loss', 'content': 0.20168617367744446, 'timestamp': '2025-09-30 22:25:09.729302', 'step': 10628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:09.761843', 'step': 10628, 'epoch': 2} {'type': 'loss', 'content': 0.0662473514676094, 'timestamp': '2025-09-30 22:25:09.764257', 'step': 10629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.797906', 'step': 10629, 'epoch': 2} {'type': 'loss', 'content': 0.08486562967300415, 'timestamp': '2025-09-30 22:25:09.800987', 'step': 10630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.835239', 'step': 10630, 'epoch': 2} {'type': 'loss', 'content': 0.09349538385868073, 'timestamp': '2025-09-30 22:25:09.839144', 'step': 10631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:09.870819', 'step': 10631, 'epoch': 2} {'type': 'loss', 'content': 0.14623107016086578, 'timestamp': '2025-09-30 22:25:09.897488', 'step': 10632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:09.930224', 'step': 10632, 'epoch': 2} {'type': 'loss', 'content': 0.13749678432941437, 'timestamp': '2025-09-30 22:25:09.935296', 'step': 10633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:09.974397', 'step': 10633, 'epoch': 2} {'type': 'loss', 'content': 0.12869079411029816, 'timestamp': '2025-09-30 22:25:09.978878', 'step': 10634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:10.012169', 'step': 10634, 'epoch': 2} {'type': 'loss', 'content': 0.1358197033405304, 'timestamp': '2025-09-30 22:25:10.017458', 'step': 10635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.050011', 'step': 10635, 'epoch': 2} {'type': 'loss', 'content': 0.1570400893688202, 'timestamp': '2025-09-30 22:25:10.074425', 'step': 10636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.108162', 'step': 10636, 'epoch': 2} {'type': 'loss', 'content': 0.12117302417755127, 'timestamp': '2025-09-30 22:25:10.112479', 'step': 10637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.144079', 'step': 10637, 'epoch': 2} {'type': 'loss', 'content': 0.15918640792369843, 'timestamp': '2025-09-30 22:25:10.148346', 'step': 10638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.180612', 'step': 10638, 'epoch': 2} {'type': 'loss', 'content': 0.15486466884613037, 'timestamp': '2025-09-30 22:25:10.185971', 'step': 10639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.217580', 'step': 10639, 'epoch': 2} {'type': 'loss', 'content': 0.0760781466960907, 'timestamp': '2025-09-30 22:25:10.242642', 'step': 10640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:10.278046', 'step': 10640, 'epoch': 2} {'type': 'loss', 'content': 0.11178016662597656, 'timestamp': '2025-09-30 22:25:10.282478', 'step': 10641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.323340', 'step': 10641, 'epoch': 2} {'type': 'loss', 'content': 0.07138808071613312, 'timestamp': '2025-09-30 22:25:10.329600', 'step': 10642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.367275', 'step': 10642, 'epoch': 2} {'type': 'loss', 'content': 0.09693654626607895, 'timestamp': '2025-09-30 22:25:10.372716', 'step': 10643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.406838', 'step': 10643, 'epoch': 2} {'type': 'loss', 'content': 0.12934958934783936, 'timestamp': '2025-09-30 22:25:10.430914', 'step': 10644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:10.463097', 'step': 10644, 'epoch': 2} {'type': 'loss', 'content': 0.20382094383239746, 'timestamp': '2025-09-30 22:25:10.465495', 'step': 10645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.496493', 'step': 10645, 'epoch': 2} {'type': 'loss', 'content': 0.10536687821149826, 'timestamp': '2025-09-30 22:25:10.498803', 'step': 10646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.529236', 'step': 10646, 'epoch': 2} {'type': 'loss', 'content': 0.11802433431148529, 'timestamp': '2025-09-30 22:25:10.531906', 'step': 10647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:10.567098', 'step': 10647, 'epoch': 2} {'type': 'loss', 'content': 0.1442813277244568, 'timestamp': '2025-09-30 22:25:10.592820', 'step': 10648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.629055', 'step': 10648, 'epoch': 2} {'type': 'loss', 'content': 0.09825356304645538, 'timestamp': '2025-09-30 22:25:10.632579', 'step': 10649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.664092', 'step': 10649, 'epoch': 2} {'type': 'loss', 'content': 0.055768609046936035, 'timestamp': '2025-09-30 22:25:10.670074', 'step': 10650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.700281', 'step': 10650, 'epoch': 2} {'type': 'loss', 'content': 0.08964444696903229, 'timestamp': '2025-09-30 22:25:10.707506', 'step': 10651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:10.742171', 'step': 10651, 'epoch': 2} {'type': 'loss', 'content': 0.08773647248744965, 'timestamp': '2025-09-30 22:25:10.770601', 'step': 10652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:10.809922', 'step': 10652, 'epoch': 2} {'type': 'loss', 'content': 0.07989878952503204, 'timestamp': '2025-09-30 22:25:10.818976', 'step': 10653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:10.851092', 'step': 10653, 'epoch': 2} {'type': 'loss', 'content': 0.13729161024093628, 'timestamp': '2025-09-30 22:25:10.854418', 'step': 10654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.886133', 'step': 10654, 'epoch': 2} {'type': 'loss', 'content': 0.0847226157784462, 'timestamp': '2025-09-30 22:25:10.901403', 'step': 10655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.939869', 'step': 10655, 'epoch': 2} {'type': 'loss', 'content': 0.08009254187345505, 'timestamp': '2025-09-30 22:25:10.967192', 'step': 10656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:10.999017', 'step': 10656, 'epoch': 2} {'type': 'loss', 'content': 0.038467854261398315, 'timestamp': '2025-09-30 22:25:11.001412', 'step': 10657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:11.031765', 'step': 10657, 'epoch': 2} {'type': 'loss', 'content': 0.06791479140520096, 'timestamp': '2025-09-30 22:25:11.034901', 'step': 10658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.066187', 'step': 10658, 'epoch': 2} {'type': 'loss', 'content': 0.1313467174768448, 'timestamp': '2025-09-30 22:25:11.069398', 'step': 10659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.100336', 'step': 10659, 'epoch': 2} {'type': 'loss', 'content': 0.08624545484781265, 'timestamp': '2025-09-30 22:25:11.124720', 'step': 10660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.155363', 'step': 10660, 'epoch': 2} {'type': 'loss', 'content': 0.12436000257730484, 'timestamp': '2025-09-30 22:25:11.158449', 'step': 10661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.192417', 'step': 10661, 'epoch': 2} {'type': 'loss', 'content': 0.14992275834083557, 'timestamp': '2025-09-30 22:25:11.195342', 'step': 10662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.226286', 'step': 10662, 'epoch': 2} {'type': 'loss', 'content': 0.1663396656513214, 'timestamp': '2025-09-30 22:25:11.236173', 'step': 10663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.274443', 'step': 10663, 'epoch': 2} {'type': 'loss', 'content': 0.1894920915365219, 'timestamp': '2025-09-30 22:25:11.300256', 'step': 10664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.339756', 'step': 10664, 'epoch': 2} {'type': 'loss', 'content': 0.0741213858127594, 'timestamp': '2025-09-30 22:25:11.350008', 'step': 10665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.391714', 'step': 10665, 'epoch': 2} {'type': 'loss', 'content': 0.09974358975887299, 'timestamp': '2025-09-30 22:25:11.395640', 'step': 10666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:11.428276', 'step': 10666, 'epoch': 2} {'type': 'loss', 'content': 0.12130285799503326, 'timestamp': '2025-09-30 22:25:11.433291', 'step': 10667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:11.468167', 'step': 10667, 'epoch': 2} {'type': 'loss', 'content': 0.07882536202669144, 'timestamp': '2025-09-30 22:25:11.494768', 'step': 10668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.532171', 'step': 10668, 'epoch': 2} {'type': 'loss', 'content': 0.062265630811452866, 'timestamp': '2025-09-30 22:25:11.536906', 'step': 10669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.570734', 'step': 10669, 'epoch': 2} {'type': 'loss', 'content': 0.1492748111486435, 'timestamp': '2025-09-30 22:25:11.573931', 'step': 10670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.604614', 'step': 10670, 'epoch': 2} {'type': 'loss', 'content': 0.13145849108695984, 'timestamp': '2025-09-30 22:25:11.608501', 'step': 10671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:11.640420', 'step': 10671, 'epoch': 2} {'type': 'loss', 'content': 0.06422200053930283, 'timestamp': '2025-09-30 22:25:11.666735', 'step': 10672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.697965', 'step': 10672, 'epoch': 2} {'type': 'loss', 'content': 0.10382358729839325, 'timestamp': '2025-09-30 22:25:11.707413', 'step': 10673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.744267', 'step': 10673, 'epoch': 2} {'type': 'loss', 'content': 0.10822081565856934, 'timestamp': '2025-09-30 22:25:11.747320', 'step': 10674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.785713', 'step': 10674, 'epoch': 2} {'type': 'loss', 'content': 0.13034139573574066, 'timestamp': '2025-09-30 22:25:11.789917', 'step': 10675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.830734', 'step': 10675, 'epoch': 2} {'type': 'loss', 'content': 0.1154022216796875, 'timestamp': '2025-09-30 22:25:11.856028', 'step': 10676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:11.888000', 'step': 10676, 'epoch': 2} {'type': 'loss', 'content': 0.18675394356250763, 'timestamp': '2025-09-30 22:25:11.903643', 'step': 10677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:11.941399', 'step': 10677, 'epoch': 2} {'type': 'loss', 'content': 0.08728231489658356, 'timestamp': '2025-09-30 22:25:11.948319', 'step': 10678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:11.981591', 'step': 10678, 'epoch': 2} {'type': 'loss', 'content': 0.13723263144493103, 'timestamp': '2025-09-30 22:25:11.987982', 'step': 10679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.019258', 'step': 10679, 'epoch': 2} {'type': 'loss', 'content': 0.10116509348154068, 'timestamp': '2025-09-30 22:25:12.048870', 'step': 10680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.079167', 'step': 10680, 'epoch': 2} {'type': 'loss', 'content': 0.15761122107505798, 'timestamp': '2025-09-30 22:25:12.082449', 'step': 10681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:12.119889', 'step': 10681, 'epoch': 2} {'type': 'loss', 'content': 0.12528860569000244, 'timestamp': '2025-09-30 22:25:12.124232', 'step': 10682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.157082', 'step': 10682, 'epoch': 2} {'type': 'loss', 'content': 0.1616000086069107, 'timestamp': '2025-09-30 22:25:12.160467', 'step': 10683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:12.194008', 'step': 10683, 'epoch': 2} {'type': 'loss', 'content': 0.13080140948295593, 'timestamp': '2025-09-30 22:25:12.219613', 'step': 10684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:12.251340', 'step': 10684, 'epoch': 2} {'type': 'loss', 'content': 0.07259007543325424, 'timestamp': '2025-09-30 22:25:12.257695', 'step': 10685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:12.289325', 'step': 10685, 'epoch': 2} {'type': 'loss', 'content': 0.07989095151424408, 'timestamp': '2025-09-30 22:25:12.295024', 'step': 10686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:12.327630', 'step': 10686, 'epoch': 2} {'type': 'loss', 'content': 0.16790415346622467, 'timestamp': '2025-09-30 22:25:12.331493', 'step': 10687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:12.373851', 'step': 10687, 'epoch': 2} {'type': 'loss', 'content': 0.18266916275024414, 'timestamp': '2025-09-30 22:25:12.405375', 'step': 10688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.446589', 'step': 10688, 'epoch': 2} {'type': 'loss', 'content': 0.07870391011238098, 'timestamp': '2025-09-30 22:25:12.452838', 'step': 10689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:12.487425', 'step': 10689, 'epoch': 2} {'type': 'loss', 'content': 0.11830596625804901, 'timestamp': '2025-09-30 22:25:12.492801', 'step': 10690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:12.526014', 'step': 10690, 'epoch': 2} {'type': 'loss', 'content': 0.09614834934473038, 'timestamp': '2025-09-30 22:25:12.537784', 'step': 10691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.569786', 'step': 10691, 'epoch': 2} {'type': 'loss', 'content': 0.11736669391393661, 'timestamp': '2025-09-30 22:25:12.594616', 'step': 10692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:12.625927', 'step': 10692, 'epoch': 2} {'type': 'loss', 'content': 0.05422321707010269, 'timestamp': '2025-09-30 22:25:12.631002', 'step': 10693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:12.662934', 'step': 10693, 'epoch': 2} {'type': 'loss', 'content': 0.079582579433918, 'timestamp': '2025-09-30 22:25:12.670471', 'step': 10694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:12.701566', 'step': 10694, 'epoch': 2} {'type': 'loss', 'content': 0.19278134405612946, 'timestamp': '2025-09-30 22:25:12.713838', 'step': 10695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:12.745521', 'step': 10695, 'epoch': 2} {'type': 'loss', 'content': 0.0921994298696518, 'timestamp': '2025-09-30 22:25:12.774147', 'step': 10696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.807794', 'step': 10696, 'epoch': 2} {'type': 'loss', 'content': 0.1309324949979782, 'timestamp': '2025-09-30 22:25:12.811957', 'step': 10697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:12.845103', 'step': 10697, 'epoch': 2} {'type': 'loss', 'content': 0.14012432098388672, 'timestamp': '2025-09-30 22:25:12.848701', 'step': 10698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:12.880163', 'step': 10698, 'epoch': 2} {'type': 'loss', 'content': 0.08444226533174515, 'timestamp': '2025-09-30 22:25:12.892580', 'step': 10699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:12.928546', 'step': 10699, 'epoch': 2} {'type': 'loss', 'content': 0.08311764150857925, 'timestamp': '2025-09-30 22:25:12.963272', 'step': 10700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:12.995540', 'step': 10700, 'epoch': 2} {'type': 'loss', 'content': 0.09524223953485489, 'timestamp': '2025-09-30 22:25:13.001337', 'step': 10701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.034633', 'step': 10701, 'epoch': 2} {'type': 'loss', 'content': 0.18328183889389038, 'timestamp': '2025-09-30 22:25:13.038253', 'step': 10702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.070254', 'step': 10702, 'epoch': 2} {'type': 'loss', 'content': 0.0704549103975296, 'timestamp': '2025-09-30 22:25:13.075222', 'step': 10703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:13.108046', 'step': 10703, 'epoch': 2} {'type': 'loss', 'content': 0.16042058169841766, 'timestamp': '2025-09-30 22:25:13.134801', 'step': 10704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.166821', 'step': 10704, 'epoch': 2} {'type': 'loss', 'content': 0.08084318786859512, 'timestamp': '2025-09-30 22:25:13.170185', 'step': 10705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:13.201440', 'step': 10705, 'epoch': 2} {'type': 'loss', 'content': 0.1708829402923584, 'timestamp': '2025-09-30 22:25:13.208505', 'step': 10706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:13.241270', 'step': 10706, 'epoch': 2} {'type': 'loss', 'content': 0.032674264162778854, 'timestamp': '2025-09-30 22:25:13.254064', 'step': 10707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:13.289711', 'step': 10707, 'epoch': 2} {'type': 'loss', 'content': 0.19187714159488678, 'timestamp': '2025-09-30 22:25:13.319282', 'step': 10708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:13.354102', 'step': 10708, 'epoch': 2} {'type': 'loss', 'content': 0.15959173440933228, 'timestamp': '2025-09-30 22:25:13.370146', 'step': 10709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.411546', 'step': 10709, 'epoch': 2} {'type': 'loss', 'content': 0.09466979652643204, 'timestamp': '2025-09-30 22:25:13.416979', 'step': 10710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:13.449442', 'step': 10710, 'epoch': 2} {'type': 'loss', 'content': 0.07711127400398254, 'timestamp': '2025-09-30 22:25:13.453678', 'step': 10711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:13.496470', 'step': 10711, 'epoch': 2} {'type': 'loss', 'content': 0.07573001086711884, 'timestamp': '2025-09-30 22:25:13.529814', 'step': 10712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:13.564693', 'step': 10712, 'epoch': 2} {'type': 'loss', 'content': 0.07357250899076462, 'timestamp': '2025-09-30 22:25:13.567441', 'step': 10713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.599465', 'step': 10713, 'epoch': 2} {'type': 'loss', 'content': 0.09788331389427185, 'timestamp': '2025-09-30 22:25:13.601591', 'step': 10714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:13.633192', 'step': 10714, 'epoch': 2} {'type': 'loss', 'content': 0.08021551370620728, 'timestamp': '2025-09-30 22:25:13.639533', 'step': 10715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:13.671457', 'step': 10715, 'epoch': 2} {'type': 'loss', 'content': 0.07809693366289139, 'timestamp': '2025-09-30 22:25:13.698243', 'step': 10716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:13.728651', 'step': 10716, 'epoch': 2} {'type': 'loss', 'content': 0.0994933620095253, 'timestamp': '2025-09-30 22:25:13.732031', 'step': 10717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:13.763385', 'step': 10717, 'epoch': 2} {'type': 'loss', 'content': 0.10269234329462051, 'timestamp': '2025-09-30 22:25:13.766255', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:25:21.664811', 'step': 10718, 'epoch': 2} {'type': 'pplx', 'content': 13220.693574069439, 'timestamp': '2025-09-30 22:25:21.669684', 'step': 10718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:21.700304', 'step': 10718, 'epoch': 2} {'type': 'loss', 'content': 0.14837172627449036, 'timestamp': '2025-09-30 22:25:21.705810', 'step': 10719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:21.752253', 'step': 10719, 'epoch': 2} {'type': 'loss', 'content': 0.07926132529973984, 'timestamp': '2025-09-30 22:25:21.779000', 'step': 10720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:21.809928', 'step': 10720, 'epoch': 2} {'type': 'loss', 'content': 0.14922958612442017, 'timestamp': '2025-09-30 22:25:21.815027', 'step': 10721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:21.856035', 'step': 10721, 'epoch': 2} {'type': 'loss', 'content': 0.03998414799571037, 'timestamp': '2025-09-30 22:25:21.862137', 'step': 10722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:21.910610', 'step': 10722, 'epoch': 2} {'type': 'loss', 'content': 0.11415161192417145, 'timestamp': '2025-09-30 22:25:21.916578', 'step': 10723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:21.951651', 'step': 10723, 'epoch': 2} {'type': 'loss', 'content': 0.09789487719535828, 'timestamp': '2025-09-30 22:25:21.976828', 'step': 10724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:22.026172', 'step': 10724, 'epoch': 2} {'type': 'loss', 'content': 0.114072784781456, 'timestamp': '2025-09-30 22:25:22.029496', 'step': 10725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.063768', 'step': 10725, 'epoch': 2} {'type': 'loss', 'content': 0.19902318716049194, 'timestamp': '2025-09-30 22:25:22.067972', 'step': 10726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.099156', 'step': 10726, 'epoch': 2} {'type': 'loss', 'content': 0.20332497358322144, 'timestamp': '2025-09-30 22:25:22.104561', 'step': 10727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.137530', 'step': 10727, 'epoch': 2} {'type': 'loss', 'content': 0.10772035270929337, 'timestamp': '2025-09-30 22:25:22.163266', 'step': 10728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.195360', 'step': 10728, 'epoch': 2} {'type': 'loss', 'content': 0.14735035598278046, 'timestamp': '2025-09-30 22:25:22.201298', 'step': 10729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.233031', 'step': 10729, 'epoch': 2} {'type': 'loss', 'content': 0.0781618058681488, 'timestamp': '2025-09-30 22:25:22.237993', 'step': 10730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.270454', 'step': 10730, 'epoch': 2} {'type': 'loss', 'content': 0.09945705533027649, 'timestamp': '2025-09-30 22:25:22.276045', 'step': 10731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.309375', 'step': 10731, 'epoch': 2} {'type': 'loss', 'content': 0.018760189414024353, 'timestamp': '2025-09-30 22:25:22.335829', 'step': 10732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.367669', 'step': 10732, 'epoch': 2} {'type': 'loss', 'content': 0.10064001381397247, 'timestamp': '2025-09-30 22:25:22.371525', 'step': 10733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:22.407357', 'step': 10733, 'epoch': 2} {'type': 'loss', 'content': 0.10708687454462051, 'timestamp': '2025-09-30 22:25:22.420240', 'step': 10734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:22.462859', 'step': 10734, 'epoch': 2} {'type': 'loss', 'content': 0.04270195588469505, 'timestamp': '2025-09-30 22:25:22.467808', 'step': 10735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.505159', 'step': 10735, 'epoch': 2} {'type': 'loss', 'content': 0.12432147562503815, 'timestamp': '2025-09-30 22:25:22.530973', 'step': 10736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:22.563714', 'step': 10736, 'epoch': 2} {'type': 'loss', 'content': 0.12293296307325363, 'timestamp': '2025-09-30 22:25:22.566822', 'step': 10737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.598737', 'step': 10737, 'epoch': 2} {'type': 'loss', 'content': 0.07785718888044357, 'timestamp': '2025-09-30 22:25:22.613563', 'step': 10738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.646619', 'step': 10738, 'epoch': 2} {'type': 'loss', 'content': 0.1886846125125885, 'timestamp': '2025-09-30 22:25:22.650696', 'step': 10739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:22.681944', 'step': 10739, 'epoch': 2} {'type': 'loss', 'content': 0.11237160116434097, 'timestamp': '2025-09-30 22:25:22.708129', 'step': 10740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:22.748671', 'step': 10740, 'epoch': 2} {'type': 'loss', 'content': 0.06700552254915237, 'timestamp': '2025-09-30 22:25:22.753750', 'step': 10741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:22.787763', 'step': 10741, 'epoch': 2} {'type': 'loss', 'content': 0.05533081293106079, 'timestamp': '2025-09-30 22:25:22.799141', 'step': 10742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.849283', 'step': 10742, 'epoch': 2} {'type': 'loss', 'content': 0.14418403804302216, 'timestamp': '2025-09-30 22:25:22.855612', 'step': 10743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.887805', 'step': 10743, 'epoch': 2} {'type': 'loss', 'content': 0.09461959451436996, 'timestamp': '2025-09-30 22:25:22.912731', 'step': 10744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:22.945066', 'step': 10744, 'epoch': 2} {'type': 'loss', 'content': 0.2092437446117401, 'timestamp': '2025-09-30 22:25:22.949618', 'step': 10745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:22.992275', 'step': 10745, 'epoch': 2} {'type': 'loss', 'content': 0.0801701471209526, 'timestamp': '2025-09-30 22:25:23.008506', 'step': 10746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:23.040067', 'step': 10746, 'epoch': 2} {'type': 'loss', 'content': 0.11018960922956467, 'timestamp': '2025-09-30 22:25:23.048287', 'step': 10747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.085036', 'step': 10747, 'epoch': 2} {'type': 'loss', 'content': 0.15875385701656342, 'timestamp': '2025-09-30 22:25:23.109586', 'step': 10748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:23.143799', 'step': 10748, 'epoch': 2} {'type': 'loss', 'content': 0.1554844081401825, 'timestamp': '2025-09-30 22:25:23.147850', 'step': 10749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.180045', 'step': 10749, 'epoch': 2} {'type': 'loss', 'content': 0.19827255606651306, 'timestamp': '2025-09-30 22:25:23.185299', 'step': 10750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.218392', 'step': 10750, 'epoch': 2} {'type': 'loss', 'content': 0.12123279273509979, 'timestamp': '2025-09-30 22:25:23.222705', 'step': 10751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:23.268617', 'step': 10751, 'epoch': 2} {'type': 'loss', 'content': 0.023118529468774796, 'timestamp': '2025-09-30 22:25:23.300976', 'step': 10752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:23.335671', 'step': 10752, 'epoch': 2} {'type': 'loss', 'content': 0.17580169439315796, 'timestamp': '2025-09-30 22:25:23.339295', 'step': 10753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:23.377686', 'step': 10753, 'epoch': 2} {'type': 'loss', 'content': 0.16089816391468048, 'timestamp': '2025-09-30 22:25:23.381246', 'step': 10754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:23.413716', 'step': 10754, 'epoch': 2} {'type': 'loss', 'content': 0.09274768829345703, 'timestamp': '2025-09-30 22:25:23.417845', 'step': 10755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:23.477096', 'step': 10755, 'epoch': 2} {'type': 'loss', 'content': 0.09481758624315262, 'timestamp': '2025-09-30 22:25:23.502128', 'step': 10756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:23.555632', 'step': 10756, 'epoch': 2} {'type': 'loss', 'content': 0.09303869307041168, 'timestamp': '2025-09-30 22:25:23.559562', 'step': 10757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:23.595718', 'step': 10757, 'epoch': 2} {'type': 'loss', 'content': 0.1320420354604721, 'timestamp': '2025-09-30 22:25:23.604294', 'step': 10758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.636773', 'step': 10758, 'epoch': 2} {'type': 'loss', 'content': 0.05557439103722572, 'timestamp': '2025-09-30 22:25:23.642728', 'step': 10759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:23.687838', 'step': 10759, 'epoch': 2} {'type': 'loss', 'content': 0.09960471093654633, 'timestamp': '2025-09-30 22:25:23.715427', 'step': 10760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.754050', 'step': 10760, 'epoch': 2} {'type': 'loss', 'content': 0.05828344449400902, 'timestamp': '2025-09-30 22:25:23.766331', 'step': 10761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.797921', 'step': 10761, 'epoch': 2} {'type': 'loss', 'content': 0.1966668963432312, 'timestamp': '2025-09-30 22:25:23.801625', 'step': 10762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.831605', 'step': 10762, 'epoch': 2} {'type': 'loss', 'content': 0.10513468086719513, 'timestamp': '2025-09-30 22:25:23.836211', 'step': 10763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:23.878658', 'step': 10763, 'epoch': 2} {'type': 'loss', 'content': 0.06021950766444206, 'timestamp': '2025-09-30 22:25:23.905008', 'step': 10764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:23.940814', 'step': 10764, 'epoch': 2} {'type': 'loss', 'content': 0.17733274400234222, 'timestamp': '2025-09-30 22:25:23.955352', 'step': 10765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:23.987397', 'step': 10765, 'epoch': 2} {'type': 'loss', 'content': 0.04737339913845062, 'timestamp': '2025-09-30 22:25:23.992857', 'step': 10766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.026960', 'step': 10766, 'epoch': 2} {'type': 'loss', 'content': 0.13800567388534546, 'timestamp': '2025-09-30 22:25:24.029901', 'step': 10767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.063731', 'step': 10767, 'epoch': 2} {'type': 'loss', 'content': 0.10527260601520538, 'timestamp': '2025-09-30 22:25:24.091590', 'step': 10768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.125087', 'step': 10768, 'epoch': 2} {'type': 'loss', 'content': 0.12999680638313293, 'timestamp': '2025-09-30 22:25:24.131057', 'step': 10769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.163867', 'step': 10769, 'epoch': 2} {'type': 'loss', 'content': 0.08472904562950134, 'timestamp': '2025-09-30 22:25:24.176451', 'step': 10770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.220976', 'step': 10770, 'epoch': 2} {'type': 'loss', 'content': 0.1416589319705963, 'timestamp': '2025-09-30 22:25:24.225133', 'step': 10771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.257709', 'step': 10771, 'epoch': 2} {'type': 'loss', 'content': 0.08470752090215683, 'timestamp': '2025-09-30 22:25:24.283111', 'step': 10772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.315244', 'step': 10772, 'epoch': 2} {'type': 'loss', 'content': 0.07262351363897324, 'timestamp': '2025-09-30 22:25:24.320841', 'step': 10773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:24.352812', 'step': 10773, 'epoch': 2} {'type': 'loss', 'content': 0.11035702377557755, 'timestamp': '2025-09-30 22:25:24.356891', 'step': 10774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:24.389232', 'step': 10774, 'epoch': 2} {'type': 'loss', 'content': 0.10817158222198486, 'timestamp': '2025-09-30 22:25:24.394928', 'step': 10775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.428142', 'step': 10775, 'epoch': 2} {'type': 'loss', 'content': 0.0868636816740036, 'timestamp': '2025-09-30 22:25:24.455384', 'step': 10776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:24.502110', 'step': 10776, 'epoch': 2} {'type': 'loss', 'content': 0.06252977252006531, 'timestamp': '2025-09-30 22:25:24.506049', 'step': 10777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:24.538454', 'step': 10777, 'epoch': 2} {'type': 'loss', 'content': 0.12647873163223267, 'timestamp': '2025-09-30 22:25:24.547936', 'step': 10778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.584942', 'step': 10778, 'epoch': 2} {'type': 'loss', 'content': 0.1391354203224182, 'timestamp': '2025-09-30 22:25:24.592723', 'step': 10779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.629752', 'step': 10779, 'epoch': 2} {'type': 'loss', 'content': 0.06506074219942093, 'timestamp': '2025-09-30 22:25:24.657044', 'step': 10780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:24.695513', 'step': 10780, 'epoch': 2} {'type': 'loss', 'content': 0.1463373750448227, 'timestamp': '2025-09-30 22:25:24.709722', 'step': 10781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.754178', 'step': 10781, 'epoch': 2} {'type': 'loss', 'content': 0.07062502205371857, 'timestamp': '2025-09-30 22:25:24.762885', 'step': 10782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:24.796167', 'step': 10782, 'epoch': 2} {'type': 'loss', 'content': 0.05456911772489548, 'timestamp': '2025-09-30 22:25:24.805128', 'step': 10783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:24.837595', 'step': 10783, 'epoch': 2} {'type': 'loss', 'content': 0.06907746940851212, 'timestamp': '2025-09-30 22:25:24.863168', 'step': 10784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:24.896702', 'step': 10784, 'epoch': 2} {'type': 'loss', 'content': 0.24396198987960815, 'timestamp': '2025-09-30 22:25:24.903655', 'step': 10785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:24.942362', 'step': 10785, 'epoch': 2} {'type': 'loss', 'content': 0.06602204591035843, 'timestamp': '2025-09-30 22:25:24.945333', 'step': 10786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:24.977015', 'step': 10786, 'epoch': 2} {'type': 'loss', 'content': 0.11066734045743942, 'timestamp': '2025-09-30 22:25:24.985037', 'step': 10787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:25.025010', 'step': 10787, 'epoch': 2} {'type': 'loss', 'content': 0.14946815371513367, 'timestamp': '2025-09-30 22:25:25.050664', 'step': 10788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.090961', 'step': 10788, 'epoch': 2} {'type': 'loss', 'content': 0.1486932784318924, 'timestamp': '2025-09-30 22:25:25.100634', 'step': 10789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:25.132546', 'step': 10789, 'epoch': 2} {'type': 'loss', 'content': 0.09717484563589096, 'timestamp': '2025-09-30 22:25:25.140123', 'step': 10790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:25.173520', 'step': 10790, 'epoch': 2} {'type': 'loss', 'content': 0.05077734589576721, 'timestamp': '2025-09-30 22:25:25.177499', 'step': 10791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:25.208856', 'step': 10791, 'epoch': 2} {'type': 'loss', 'content': 0.10855815559625626, 'timestamp': '2025-09-30 22:25:25.243513', 'step': 10792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:25.277344', 'step': 10792, 'epoch': 2} {'type': 'loss', 'content': 0.05411365628242493, 'timestamp': '2025-09-30 22:25:25.283774', 'step': 10793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:25.317463', 'step': 10793, 'epoch': 2} {'type': 'loss', 'content': 0.1599612683057785, 'timestamp': '2025-09-30 22:25:25.325772', 'step': 10794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:25.362965', 'step': 10794, 'epoch': 2} {'type': 'loss', 'content': 0.05275620147585869, 'timestamp': '2025-09-30 22:25:25.367735', 'step': 10795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:25.403522', 'step': 10795, 'epoch': 2} {'type': 'loss', 'content': 0.1483713686466217, 'timestamp': '2025-09-30 22:25:25.433045', 'step': 10796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.464729', 'step': 10796, 'epoch': 2} {'type': 'loss', 'content': 0.09879431873559952, 'timestamp': '2025-09-30 22:25:25.467199', 'step': 10797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.499555', 'step': 10797, 'epoch': 2} {'type': 'loss', 'content': 0.08492851257324219, 'timestamp': '2025-09-30 22:25:25.503892', 'step': 10798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:25.536846', 'step': 10798, 'epoch': 2} {'type': 'loss', 'content': 0.1728232055902481, 'timestamp': '2025-09-30 22:25:25.539636', 'step': 10799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:25:25.579422', 'step': 10799, 'epoch': 2} {'type': 'loss', 'content': 0.09348931908607483, 'timestamp': '2025-09-30 22:25:25.616641', 'step': 10800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.647171', 'step': 10800, 'epoch': 2} {'type': 'loss', 'content': 0.12835173308849335, 'timestamp': '2025-09-30 22:25:25.650147', 'step': 10801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:25.681567', 'step': 10801, 'epoch': 2} {'type': 'loss', 'content': 0.17602373659610748, 'timestamp': '2025-09-30 22:25:25.686456', 'step': 10802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.717397', 'step': 10802, 'epoch': 2} {'type': 'loss', 'content': 0.1352788209915161, 'timestamp': '2025-09-30 22:25:25.720381', 'step': 10803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.751767', 'step': 10803, 'epoch': 2} {'type': 'loss', 'content': 0.18034684658050537, 'timestamp': '2025-09-30 22:25:25.776301', 'step': 10804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:25.807955', 'step': 10804, 'epoch': 2} {'type': 'loss', 'content': 0.07626702636480331, 'timestamp': '2025-09-30 22:25:25.810541', 'step': 10805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:25.841277', 'step': 10805, 'epoch': 2} {'type': 'loss', 'content': 0.13092172145843506, 'timestamp': '2025-09-30 22:25:25.844491', 'step': 10806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:25.875750', 'step': 10806, 'epoch': 2} {'type': 'loss', 'content': 0.09854163229465485, 'timestamp': '2025-09-30 22:25:25.880021', 'step': 10807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.910650', 'step': 10807, 'epoch': 2} {'type': 'loss', 'content': 0.08599095791578293, 'timestamp': '2025-09-30 22:25:25.934996', 'step': 10808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:25.973852', 'step': 10808, 'epoch': 2} {'type': 'loss', 'content': 0.16946950554847717, 'timestamp': '2025-09-30 22:25:25.983760', 'step': 10809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:26.014330', 'step': 10809, 'epoch': 2} {'type': 'loss', 'content': 0.057892296463251114, 'timestamp': '2025-09-30 22:25:26.020936', 'step': 10810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.058581', 'step': 10810, 'epoch': 2} {'type': 'loss', 'content': 0.08880492299795151, 'timestamp': '2025-09-30 22:25:26.061157', 'step': 10811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.091736', 'step': 10811, 'epoch': 2} {'type': 'loss', 'content': 0.07503487914800644, 'timestamp': '2025-09-30 22:25:26.118985', 'step': 10812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:26.157441', 'step': 10812, 'epoch': 2} {'type': 'loss', 'content': 0.07130017131567001, 'timestamp': '2025-09-30 22:25:26.162604', 'step': 10813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:26.195203', 'step': 10813, 'epoch': 2} {'type': 'loss', 'content': 0.13190414011478424, 'timestamp': '2025-09-30 22:25:26.201032', 'step': 10814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:26.238421', 'step': 10814, 'epoch': 2} {'type': 'loss', 'content': 0.1493307650089264, 'timestamp': '2025-09-30 22:25:26.244849', 'step': 10815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:26.279696', 'step': 10815, 'epoch': 2} {'type': 'loss', 'content': 0.11678151786327362, 'timestamp': '2025-09-30 22:25:26.317461', 'step': 10816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.350129', 'step': 10816, 'epoch': 2} {'type': 'loss', 'content': 0.10137542337179184, 'timestamp': '2025-09-30 22:25:26.354476', 'step': 10817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:26.386328', 'step': 10817, 'epoch': 2} {'type': 'loss', 'content': 0.06884016841650009, 'timestamp': '2025-09-30 22:25:26.391349', 'step': 10818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:26.423617', 'step': 10818, 'epoch': 2} {'type': 'loss', 'content': 0.06024348363280296, 'timestamp': '2025-09-30 22:25:26.426419', 'step': 10819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:26.458257', 'step': 10819, 'epoch': 2} {'type': 'loss', 'content': 0.14981359243392944, 'timestamp': '2025-09-30 22:25:26.488839', 'step': 10820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.525885', 'step': 10820, 'epoch': 2} {'type': 'loss', 'content': 0.13461944460868835, 'timestamp': '2025-09-30 22:25:26.528759', 'step': 10821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.560422', 'step': 10821, 'epoch': 2} {'type': 'loss', 'content': 0.13092419505119324, 'timestamp': '2025-09-30 22:25:26.564115', 'step': 10822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:26.601553', 'step': 10822, 'epoch': 2} {'type': 'loss', 'content': 0.0461793914437294, 'timestamp': '2025-09-30 22:25:26.604764', 'step': 10823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.639829', 'step': 10823, 'epoch': 2} {'type': 'loss', 'content': 0.07264254987239838, 'timestamp': '2025-09-30 22:25:26.664128', 'step': 10824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.696531', 'step': 10824, 'epoch': 2} {'type': 'loss', 'content': 0.21585673093795776, 'timestamp': '2025-09-30 22:25:26.700910', 'step': 10825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:26.732544', 'step': 10825, 'epoch': 2} {'type': 'loss', 'content': 0.048854418098926544, 'timestamp': '2025-09-30 22:25:26.737748', 'step': 10826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:26.768738', 'step': 10826, 'epoch': 2} {'type': 'loss', 'content': 0.1180272251367569, 'timestamp': '2025-09-30 22:25:26.774117', 'step': 10827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:26.806796', 'step': 10827, 'epoch': 2} {'type': 'loss', 'content': 0.08170349150896072, 'timestamp': '2025-09-30 22:25:26.832153', 'step': 10828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:26.862568', 'step': 10828, 'epoch': 2} {'type': 'loss', 'content': 0.12508606910705566, 'timestamp': '2025-09-30 22:25:26.864931', 'step': 10829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:26.899185', 'step': 10829, 'epoch': 2} {'type': 'loss', 'content': 0.07737355679273605, 'timestamp': '2025-09-30 22:25:26.904239', 'step': 10830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:26.937334', 'step': 10830, 'epoch': 2} {'type': 'loss', 'content': 0.16753049194812775, 'timestamp': '2025-09-30 22:25:26.942842', 'step': 10831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:26.974750', 'step': 10831, 'epoch': 2} {'type': 'loss', 'content': 0.0786454975605011, 'timestamp': '2025-09-30 22:25:27.005044', 'step': 10832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.049080', 'step': 10832, 'epoch': 2} {'type': 'loss', 'content': 0.09653156995773315, 'timestamp': '2025-09-30 22:25:27.052256', 'step': 10833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:27.084739', 'step': 10833, 'epoch': 2} {'type': 'loss', 'content': 0.12226654589176178, 'timestamp': '2025-09-30 22:25:27.090502', 'step': 10834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.123750', 'step': 10834, 'epoch': 2} {'type': 'loss', 'content': 0.07085753977298737, 'timestamp': '2025-09-30 22:25:27.128739', 'step': 10835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.162101', 'step': 10835, 'epoch': 2} {'type': 'loss', 'content': 0.16228950023651123, 'timestamp': '2025-09-30 22:25:27.186757', 'step': 10836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:27.216566', 'step': 10836, 'epoch': 2} {'type': 'loss', 'content': 0.08559525012969971, 'timestamp': '2025-09-30 22:25:27.219532', 'step': 10837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.250255', 'step': 10837, 'epoch': 2} {'type': 'loss', 'content': 0.10744347423315048, 'timestamp': '2025-09-30 22:25:27.254345', 'step': 10838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.286294', 'step': 10838, 'epoch': 2} {'type': 'loss', 'content': 0.10253676772117615, 'timestamp': '2025-09-30 22:25:27.291391', 'step': 10839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:27.326156', 'step': 10839, 'epoch': 2} {'type': 'loss', 'content': 0.15297777950763702, 'timestamp': '2025-09-30 22:25:27.351661', 'step': 10840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:27.386198', 'step': 10840, 'epoch': 2} {'type': 'loss', 'content': 0.11460244655609131, 'timestamp': '2025-09-30 22:25:27.388999', 'step': 10841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.422322', 'step': 10841, 'epoch': 2} {'type': 'loss', 'content': 0.1781456172466278, 'timestamp': '2025-09-30 22:25:27.431165', 'step': 10842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.472955', 'step': 10842, 'epoch': 2} {'type': 'loss', 'content': 0.1543342024087906, 'timestamp': '2025-09-30 22:25:27.476556', 'step': 10843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.519522', 'step': 10843, 'epoch': 2} {'type': 'loss', 'content': 0.06318730115890503, 'timestamp': '2025-09-30 22:25:27.550083', 'step': 10844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.587684', 'step': 10844, 'epoch': 2} {'type': 'loss', 'content': 0.1233450174331665, 'timestamp': '2025-09-30 22:25:27.591959', 'step': 10845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.623831', 'step': 10845, 'epoch': 2} {'type': 'loss', 'content': 0.13178957998752594, 'timestamp': '2025-09-30 22:25:27.632847', 'step': 10846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.671266', 'step': 10846, 'epoch': 2} {'type': 'loss', 'content': 0.08558323234319687, 'timestamp': '2025-09-30 22:25:27.685492', 'step': 10847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.717180', 'step': 10847, 'epoch': 2} {'type': 'loss', 'content': 0.06184999644756317, 'timestamp': '2025-09-30 22:25:27.745719', 'step': 10848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.779473', 'step': 10848, 'epoch': 2} {'type': 'loss', 'content': 0.14860910177230835, 'timestamp': '2025-09-30 22:25:27.790889', 'step': 10849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.822499', 'step': 10849, 'epoch': 2} {'type': 'loss', 'content': 0.17118166387081146, 'timestamp': '2025-09-30 22:25:27.826579', 'step': 10850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:27.858325', 'step': 10850, 'epoch': 2} {'type': 'loss', 'content': 0.15425893664360046, 'timestamp': '2025-09-30 22:25:27.862664', 'step': 10851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:27.893816', 'step': 10851, 'epoch': 2} {'type': 'loss', 'content': 0.11915027350187302, 'timestamp': '2025-09-30 22:25:27.920206', 'step': 10852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:27.964353', 'step': 10852, 'epoch': 2} {'type': 'loss', 'content': 0.0851791650056839, 'timestamp': '2025-09-30 22:25:27.971123', 'step': 10853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:28.005779', 'step': 10853, 'epoch': 2} {'type': 'loss', 'content': 0.08774136751890182, 'timestamp': '2025-09-30 22:25:28.009682', 'step': 10854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.045394', 'step': 10854, 'epoch': 2} {'type': 'loss', 'content': 0.04848824813961983, 'timestamp': '2025-09-30 22:25:28.057454', 'step': 10855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.090662', 'step': 10855, 'epoch': 2} {'type': 'loss', 'content': 0.12669673562049866, 'timestamp': '2025-09-30 22:25:28.127267', 'step': 10856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:28.161752', 'step': 10856, 'epoch': 2} {'type': 'loss', 'content': 0.13658592104911804, 'timestamp': '2025-09-30 22:25:28.167014', 'step': 10857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.200633', 'step': 10857, 'epoch': 2} {'type': 'loss', 'content': 0.129043310880661, 'timestamp': '2025-09-30 22:25:28.204587', 'step': 10858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.243019', 'step': 10858, 'epoch': 2} {'type': 'loss', 'content': 0.14977622032165527, 'timestamp': '2025-09-30 22:25:28.247412', 'step': 10859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.289171', 'step': 10859, 'epoch': 2} {'type': 'loss', 'content': 0.0469222255051136, 'timestamp': '2025-09-30 22:25:28.315900', 'step': 10860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.362593', 'step': 10860, 'epoch': 2} {'type': 'loss', 'content': 0.12545306980609894, 'timestamp': '2025-09-30 22:25:28.380552', 'step': 10861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.412047', 'step': 10861, 'epoch': 2} {'type': 'loss', 'content': 0.1260877400636673, 'timestamp': '2025-09-30 22:25:28.420277', 'step': 10862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:28.453683', 'step': 10862, 'epoch': 2} {'type': 'loss', 'content': 0.16426542401313782, 'timestamp': '2025-09-30 22:25:28.463415', 'step': 10863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.497147', 'step': 10863, 'epoch': 2} {'type': 'loss', 'content': 0.12610994279384613, 'timestamp': '2025-09-30 22:25:28.523672', 'step': 10864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.555396', 'step': 10864, 'epoch': 2} {'type': 'loss', 'content': 0.1125352755188942, 'timestamp': '2025-09-30 22:25:28.562744', 'step': 10865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:28.599082', 'step': 10865, 'epoch': 2} {'type': 'loss', 'content': 0.08262645453214645, 'timestamp': '2025-09-30 22:25:28.602708', 'step': 10866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:28.637565', 'step': 10866, 'epoch': 2} {'type': 'loss', 'content': 0.1335095763206482, 'timestamp': '2025-09-30 22:25:28.655009', 'step': 10867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.688269', 'step': 10867, 'epoch': 2} {'type': 'loss', 'content': 0.13816265761852264, 'timestamp': '2025-09-30 22:25:28.726480', 'step': 10868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:28.769395', 'step': 10868, 'epoch': 2} {'type': 'loss', 'content': 0.08184964954853058, 'timestamp': '2025-09-30 22:25:28.776966', 'step': 10869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.809330', 'step': 10869, 'epoch': 2} {'type': 'loss', 'content': 0.053604383021593094, 'timestamp': '2025-09-30 22:25:28.813601', 'step': 10870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.846630', 'step': 10870, 'epoch': 2} {'type': 'loss', 'content': 0.04341106489300728, 'timestamp': '2025-09-30 22:25:28.850555', 'step': 10871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:28.884462', 'step': 10871, 'epoch': 2} {'type': 'loss', 'content': 0.14657007157802582, 'timestamp': '2025-09-30 22:25:28.911220', 'step': 10872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:28.952638', 'step': 10872, 'epoch': 2} {'type': 'loss', 'content': 0.14783936738967896, 'timestamp': '2025-09-30 22:25:28.957219', 'step': 10873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:28.989587', 'step': 10873, 'epoch': 2} {'type': 'loss', 'content': 0.05981738120317459, 'timestamp': '2025-09-30 22:25:28.996923', 'step': 10874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.032163', 'step': 10874, 'epoch': 2} {'type': 'loss', 'content': 0.12057981640100479, 'timestamp': '2025-09-30 22:25:29.036855', 'step': 10875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.069269', 'step': 10875, 'epoch': 2} {'type': 'loss', 'content': 0.11769281327724457, 'timestamp': '2025-09-30 22:25:29.099027', 'step': 10876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.131717', 'step': 10876, 'epoch': 2} {'type': 'loss', 'content': 0.04102050140500069, 'timestamp': '2025-09-30 22:25:29.136444', 'step': 10877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.167565', 'step': 10877, 'epoch': 2} {'type': 'loss', 'content': 0.08539258688688278, 'timestamp': '2025-09-30 22:25:29.173556', 'step': 10878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.204765', 'step': 10878, 'epoch': 2} {'type': 'loss', 'content': 0.11045228689908981, 'timestamp': '2025-09-30 22:25:29.211610', 'step': 10879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.245638', 'step': 10879, 'epoch': 2} {'type': 'loss', 'content': 0.047862228006124496, 'timestamp': '2025-09-30 22:25:29.271235', 'step': 10880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.303193', 'step': 10880, 'epoch': 2} {'type': 'loss', 'content': 0.12479599565267563, 'timestamp': '2025-09-30 22:25:29.307469', 'step': 10881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.339646', 'step': 10881, 'epoch': 2} {'type': 'loss', 'content': 0.13047188520431519, 'timestamp': '2025-09-30 22:25:29.343799', 'step': 10882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:29.374611', 'step': 10882, 'epoch': 2} {'type': 'loss', 'content': 0.1343199461698532, 'timestamp': '2025-09-30 22:25:29.378484', 'step': 10883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.410540', 'step': 10883, 'epoch': 2} {'type': 'loss', 'content': 0.09541743993759155, 'timestamp': '2025-09-30 22:25:29.436680', 'step': 10884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.466833', 'step': 10884, 'epoch': 2} {'type': 'loss', 'content': 0.049779538065195084, 'timestamp': '2025-09-30 22:25:29.479671', 'step': 10885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.520919', 'step': 10885, 'epoch': 2} {'type': 'loss', 'content': 0.05924615263938904, 'timestamp': '2025-09-30 22:25:29.526420', 'step': 10886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:29.559539', 'step': 10886, 'epoch': 2} {'type': 'loss', 'content': 0.22670619189739227, 'timestamp': '2025-09-30 22:25:29.564731', 'step': 10887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:29.598601', 'step': 10887, 'epoch': 2} {'type': 'loss', 'content': 0.19335532188415527, 'timestamp': '2025-09-30 22:25:29.624189', 'step': 10888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:29.658522', 'step': 10888, 'epoch': 2} {'type': 'loss', 'content': 0.07576434314250946, 'timestamp': '2025-09-30 22:25:29.670238', 'step': 10889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:29.709164', 'step': 10889, 'epoch': 2} {'type': 'loss', 'content': 0.15430478751659393, 'timestamp': '2025-09-30 22:25:29.713512', 'step': 10890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.744275', 'step': 10890, 'epoch': 2} {'type': 'loss', 'content': 0.10533196479082108, 'timestamp': '2025-09-30 22:25:29.747391', 'step': 10891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.784331', 'step': 10891, 'epoch': 2} {'type': 'loss', 'content': 0.10197822004556656, 'timestamp': '2025-09-30 22:25:29.809565', 'step': 10892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.853525', 'step': 10892, 'epoch': 2} {'type': 'loss', 'content': 0.04268498346209526, 'timestamp': '2025-09-30 22:25:29.864364', 'step': 10893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.899459', 'step': 10893, 'epoch': 2} {'type': 'loss', 'content': 0.15684248507022858, 'timestamp': '2025-09-30 22:25:29.902962', 'step': 10894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.933587', 'step': 10894, 'epoch': 2} {'type': 'loss', 'content': 0.08654161542654037, 'timestamp': '2025-09-30 22:25:29.937760', 'step': 10895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:29.969269', 'step': 10895, 'epoch': 2} {'type': 'loss', 'content': 0.1865146905183792, 'timestamp': '2025-09-30 22:25:29.994984', 'step': 10896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:30.027468', 'step': 10896, 'epoch': 2} {'type': 'loss', 'content': 0.0815354734659195, 'timestamp': '2025-09-30 22:25:30.031579', 'step': 10897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.066653', 'step': 10897, 'epoch': 2} {'type': 'loss', 'content': 0.19703732430934906, 'timestamp': '2025-09-30 22:25:30.070137', 'step': 10898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.100956', 'step': 10898, 'epoch': 2} {'type': 'loss', 'content': 0.08112958818674088, 'timestamp': '2025-09-30 22:25:30.104715', 'step': 10899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:30.142774', 'step': 10899, 'epoch': 2} {'type': 'loss', 'content': 0.10941918939352036, 'timestamp': '2025-09-30 22:25:30.167539', 'step': 10900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.200276', 'step': 10900, 'epoch': 2} {'type': 'loss', 'content': 0.09395736455917358, 'timestamp': '2025-09-30 22:25:30.205082', 'step': 10901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:30.237648', 'step': 10901, 'epoch': 2} {'type': 'loss', 'content': 0.15719711780548096, 'timestamp': '2025-09-30 22:25:30.240655', 'step': 10902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.272654', 'step': 10902, 'epoch': 2} {'type': 'loss', 'content': 0.13555936515331268, 'timestamp': '2025-09-30 22:25:30.276237', 'step': 10903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:30.308165', 'step': 10903, 'epoch': 2} {'type': 'loss', 'content': 0.07648535817861557, 'timestamp': '2025-09-30 22:25:30.337947', 'step': 10904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:30.370624', 'step': 10904, 'epoch': 2} {'type': 'loss', 'content': 0.12934203445911407, 'timestamp': '2025-09-30 22:25:30.386517', 'step': 10905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:30.420827', 'step': 10905, 'epoch': 2} {'type': 'loss', 'content': 0.0989847183227539, 'timestamp': '2025-09-30 22:25:30.425121', 'step': 10906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.468024', 'step': 10906, 'epoch': 2} {'type': 'loss', 'content': 0.07692491263151169, 'timestamp': '2025-09-30 22:25:30.472475', 'step': 10907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.505198', 'step': 10907, 'epoch': 2} {'type': 'loss', 'content': 0.1504283845424652, 'timestamp': '2025-09-30 22:25:30.530793', 'step': 10908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.562281', 'step': 10908, 'epoch': 2} {'type': 'loss', 'content': 0.12844213843345642, 'timestamp': '2025-09-30 22:25:30.567570', 'step': 10909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.601825', 'step': 10909, 'epoch': 2} {'type': 'loss', 'content': 0.1424543261528015, 'timestamp': '2025-09-30 22:25:30.607174', 'step': 10910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:30.639196', 'step': 10910, 'epoch': 2} {'type': 'loss', 'content': 0.1744072586297989, 'timestamp': '2025-09-30 22:25:30.642776', 'step': 10911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.683879', 'step': 10911, 'epoch': 2} {'type': 'loss', 'content': 0.14950713515281677, 'timestamp': '2025-09-30 22:25:30.710427', 'step': 10912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:30.750167', 'step': 10912, 'epoch': 2} {'type': 'loss', 'content': 0.10285113751888275, 'timestamp': '2025-09-30 22:25:30.757574', 'step': 10913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:30.788485', 'step': 10913, 'epoch': 2} {'type': 'loss', 'content': 0.1178121417760849, 'timestamp': '2025-09-30 22:25:30.793466', 'step': 10914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.825352', 'step': 10914, 'epoch': 2} {'type': 'loss', 'content': 0.08571362495422363, 'timestamp': '2025-09-30 22:25:30.830367', 'step': 10915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:30.867806', 'step': 10915, 'epoch': 2} {'type': 'loss', 'content': 0.15096943080425262, 'timestamp': '2025-09-30 22:25:30.893371', 'step': 10916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.929865', 'step': 10916, 'epoch': 2} {'type': 'loss', 'content': 0.03399030491709709, 'timestamp': '2025-09-30 22:25:30.936238', 'step': 10917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:30.967721', 'step': 10917, 'epoch': 2} {'type': 'loss', 'content': 0.07230871170759201, 'timestamp': '2025-09-30 22:25:30.971516', 'step': 10918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.010589', 'step': 10918, 'epoch': 2} {'type': 'loss', 'content': 0.12528592348098755, 'timestamp': '2025-09-30 22:25:31.013791', 'step': 10919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:31.044319', 'step': 10919, 'epoch': 2} {'type': 'loss', 'content': 0.12426678836345673, 'timestamp': '2025-09-30 22:25:31.068604', 'step': 10920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.099425', 'step': 10920, 'epoch': 2} {'type': 'loss', 'content': 0.0916546881198883, 'timestamp': '2025-09-30 22:25:31.101654', 'step': 10921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.132151', 'step': 10921, 'epoch': 2} {'type': 'loss', 'content': 0.039497002959251404, 'timestamp': '2025-09-30 22:25:31.135228', 'step': 10922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.168447', 'step': 10922, 'epoch': 2} {'type': 'loss', 'content': 0.14303940534591675, 'timestamp': '2025-09-30 22:25:31.173001', 'step': 10923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.205303', 'step': 10923, 'epoch': 2} {'type': 'loss', 'content': 0.0947437733411789, 'timestamp': '2025-09-30 22:25:31.229240', 'step': 10924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.260746', 'step': 10924, 'epoch': 2} {'type': 'loss', 'content': 0.15543973445892334, 'timestamp': '2025-09-30 22:25:31.267128', 'step': 10925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.299287', 'step': 10925, 'epoch': 2} {'type': 'loss', 'content': 0.09359671175479889, 'timestamp': '2025-09-30 22:25:31.311398', 'step': 10926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.350594', 'step': 10926, 'epoch': 2} {'type': 'loss', 'content': 0.14697320759296417, 'timestamp': '2025-09-30 22:25:31.362944', 'step': 10927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.394347', 'step': 10927, 'epoch': 2} {'type': 'loss', 'content': 0.06565328687429428, 'timestamp': '2025-09-30 22:25:31.418700', 'step': 10928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.452473', 'step': 10928, 'epoch': 2} {'type': 'loss', 'content': 0.10569065809249878, 'timestamp': '2025-09-30 22:25:31.455853', 'step': 10929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.490007', 'step': 10929, 'epoch': 2} {'type': 'loss', 'content': 0.14773142337799072, 'timestamp': '2025-09-30 22:25:31.492912', 'step': 10930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:31.529049', 'step': 10930, 'epoch': 2} {'type': 'loss', 'content': 0.16071999073028564, 'timestamp': '2025-09-30 22:25:31.531924', 'step': 10931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.563138', 'step': 10931, 'epoch': 2} {'type': 'loss', 'content': 0.08994850516319275, 'timestamp': '2025-09-30 22:25:31.589401', 'step': 10932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:31.622309', 'step': 10932, 'epoch': 2} {'type': 'loss', 'content': 0.06446840614080429, 'timestamp': '2025-09-30 22:25:31.628894', 'step': 10933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:31.660319', 'step': 10933, 'epoch': 2} {'type': 'loss', 'content': 0.1215112954378128, 'timestamp': '2025-09-30 22:25:31.664301', 'step': 10934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:31.696360', 'step': 10934, 'epoch': 2} {'type': 'loss', 'content': 0.1323082000017166, 'timestamp': '2025-09-30 22:25:31.700601', 'step': 10935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.732072', 'step': 10935, 'epoch': 2} {'type': 'loss', 'content': 0.20124167203903198, 'timestamp': '2025-09-30 22:25:31.758797', 'step': 10936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.791780', 'step': 10936, 'epoch': 2} {'type': 'loss', 'content': 0.09963402897119522, 'timestamp': '2025-09-30 22:25:31.795028', 'step': 10937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:31.825630', 'step': 10937, 'epoch': 2} {'type': 'loss', 'content': 0.09767527133226395, 'timestamp': '2025-09-30 22:25:31.828584', 'step': 10938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.867052', 'step': 10938, 'epoch': 2} {'type': 'loss', 'content': 0.10223900526762009, 'timestamp': '2025-09-30 22:25:31.878464', 'step': 10939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:31.917947', 'step': 10939, 'epoch': 2} {'type': 'loss', 'content': 0.1540132313966751, 'timestamp': '2025-09-30 22:25:31.948856', 'step': 10940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:31.978960', 'step': 10940, 'epoch': 2} {'type': 'loss', 'content': 0.12800638377666473, 'timestamp': '2025-09-30 22:25:31.984977', 'step': 10941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:32.016262', 'step': 10941, 'epoch': 2} {'type': 'loss', 'content': 0.1801256686449051, 'timestamp': '2025-09-30 22:25:32.020545', 'step': 10942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:32.062276', 'step': 10942, 'epoch': 2} {'type': 'loss', 'content': 0.1659475862979889, 'timestamp': '2025-09-30 22:25:32.067447', 'step': 10943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:32.101482', 'step': 10943, 'epoch': 2} {'type': 'loss', 'content': 0.12203900516033173, 'timestamp': '2025-09-30 22:25:32.126876', 'step': 10944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.161411', 'step': 10944, 'epoch': 2} {'type': 'loss', 'content': 0.09539992362260818, 'timestamp': '2025-09-30 22:25:32.166991', 'step': 10945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.197403', 'step': 10945, 'epoch': 2} {'type': 'loss', 'content': 0.07354484498500824, 'timestamp': '2025-09-30 22:25:32.206382', 'step': 10946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:32.238442', 'step': 10946, 'epoch': 2} {'type': 'loss', 'content': 0.09132518619298935, 'timestamp': '2025-09-30 22:25:32.243755', 'step': 10947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:32.280068', 'step': 10947, 'epoch': 2} {'type': 'loss', 'content': 0.08204204589128494, 'timestamp': '2025-09-30 22:25:32.310304', 'step': 10948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.344692', 'step': 10948, 'epoch': 2} {'type': 'loss', 'content': 0.12131273746490479, 'timestamp': '2025-09-30 22:25:32.347746', 'step': 10949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:32.378089', 'step': 10949, 'epoch': 2} {'type': 'loss', 'content': 0.0587640218436718, 'timestamp': '2025-09-30 22:25:32.380687', 'step': 10950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:32.420071', 'step': 10950, 'epoch': 2} {'type': 'loss', 'content': 0.08268582820892334, 'timestamp': '2025-09-30 22:25:32.423113', 'step': 10951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.453818', 'step': 10951, 'epoch': 2} {'type': 'loss', 'content': 0.10867954045534134, 'timestamp': '2025-09-30 22:25:32.481080', 'step': 10952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.517560', 'step': 10952, 'epoch': 2} {'type': 'loss', 'content': 0.04951968416571617, 'timestamp': '2025-09-30 22:25:32.528032', 'step': 10953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.563685', 'step': 10953, 'epoch': 2} {'type': 'loss', 'content': 0.13717450201511383, 'timestamp': '2025-09-30 22:25:32.566590', 'step': 10954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:32.603516', 'step': 10954, 'epoch': 2} {'type': 'loss', 'content': 0.1096871942281723, 'timestamp': '2025-09-30 22:25:32.608709', 'step': 10955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:32.641464', 'step': 10955, 'epoch': 2} {'type': 'loss', 'content': 0.26149874925613403, 'timestamp': '2025-09-30 22:25:32.667857', 'step': 10956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:32.697541', 'step': 10956, 'epoch': 2} {'type': 'loss', 'content': 0.06103956326842308, 'timestamp': '2025-09-30 22:25:32.700429', 'step': 10957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.730955', 'step': 10957, 'epoch': 2} {'type': 'loss', 'content': 0.13291652500629425, 'timestamp': '2025-09-30 22:25:32.733997', 'step': 10958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.765224', 'step': 10958, 'epoch': 2} {'type': 'loss', 'content': 0.12091000378131866, 'timestamp': '2025-09-30 22:25:32.770354', 'step': 10959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:32.802322', 'step': 10959, 'epoch': 2} {'type': 'loss', 'content': 0.0896705910563469, 'timestamp': '2025-09-30 22:25:32.828290', 'step': 10960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:32.858410', 'step': 10960, 'epoch': 2} {'type': 'loss', 'content': 0.16982488334178925, 'timestamp': '2025-09-30 22:25:32.874280', 'step': 10961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:32.915198', 'step': 10961, 'epoch': 2} {'type': 'loss', 'content': 0.09739811718463898, 'timestamp': '2025-09-30 22:25:32.924114', 'step': 10962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:32.959316', 'step': 10962, 'epoch': 2} {'type': 'loss', 'content': 0.07075591385364532, 'timestamp': '2025-09-30 22:25:32.966996', 'step': 10963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:33.005475', 'step': 10963, 'epoch': 2} {'type': 'loss', 'content': 0.12927544116973877, 'timestamp': '2025-09-30 22:25:33.030893', 'step': 10964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.076982', 'step': 10964, 'epoch': 2} {'type': 'loss', 'content': 0.08878687769174576, 'timestamp': '2025-09-30 22:25:33.090975', 'step': 10965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:33.121824', 'step': 10965, 'epoch': 2} {'type': 'loss', 'content': 0.09141138941049576, 'timestamp': '2025-09-30 22:25:33.127051', 'step': 10966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.158663', 'step': 10966, 'epoch': 2} {'type': 'loss', 'content': 0.09261292219161987, 'timestamp': '2025-09-30 22:25:33.171217', 'step': 10967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.205356', 'step': 10967, 'epoch': 2} {'type': 'loss', 'content': 0.09976891428232193, 'timestamp': '2025-09-30 22:25:33.231749', 'step': 10968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.283457', 'step': 10968, 'epoch': 2} {'type': 'loss', 'content': 0.08335528522729874, 'timestamp': '2025-09-30 22:25:33.286581', 'step': 10969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.319961', 'step': 10969, 'epoch': 2} {'type': 'loss', 'content': 0.15158067643642426, 'timestamp': '2025-09-30 22:25:33.324056', 'step': 10970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.362269', 'step': 10970, 'epoch': 2} {'type': 'loss', 'content': 0.14463812112808228, 'timestamp': '2025-09-30 22:25:33.365420', 'step': 10971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:33.396253', 'step': 10971, 'epoch': 2} {'type': 'loss', 'content': 0.13916602730751038, 'timestamp': '2025-09-30 22:25:33.420731', 'step': 10972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.468963', 'step': 10972, 'epoch': 2} {'type': 'loss', 'content': 0.03953506797552109, 'timestamp': '2025-09-30 22:25:33.473321', 'step': 10973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:33.510981', 'step': 10973, 'epoch': 2} {'type': 'loss', 'content': 0.13505461812019348, 'timestamp': '2025-09-30 22:25:33.520056', 'step': 10974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.552330', 'step': 10974, 'epoch': 2} {'type': 'loss', 'content': 0.16986055672168732, 'timestamp': '2025-09-30 22:25:33.562247', 'step': 10975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.594606', 'step': 10975, 'epoch': 2} {'type': 'loss', 'content': 0.09926697611808777, 'timestamp': '2025-09-30 22:25:33.624462', 'step': 10976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.657392', 'step': 10976, 'epoch': 2} {'type': 'loss', 'content': 0.14743612706661224, 'timestamp': '2025-09-30 22:25:33.665174', 'step': 10977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:33.702321', 'step': 10977, 'epoch': 2} {'type': 'loss', 'content': 0.11339281499385834, 'timestamp': '2025-09-30 22:25:33.710623', 'step': 10978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:33.743658', 'step': 10978, 'epoch': 2} {'type': 'loss', 'content': 0.10447022318840027, 'timestamp': '2025-09-30 22:25:33.757845', 'step': 10979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.789744', 'step': 10979, 'epoch': 2} {'type': 'loss', 'content': 0.15132851898670197, 'timestamp': '2025-09-30 22:25:33.816065', 'step': 10980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:33.852480', 'step': 10980, 'epoch': 2} {'type': 'loss', 'content': 0.08536188304424286, 'timestamp': '2025-09-30 22:25:33.856444', 'step': 10981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:33.889177', 'step': 10981, 'epoch': 2} {'type': 'loss', 'content': 0.0953541025519371, 'timestamp': '2025-09-30 22:25:33.892297', 'step': 10982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:33.924195', 'step': 10982, 'epoch': 2} {'type': 'loss', 'content': 0.15463531017303467, 'timestamp': '2025-09-30 22:25:33.927309', 'step': 10983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:33.967392', 'step': 10983, 'epoch': 2} {'type': 'loss', 'content': 0.07625322043895721, 'timestamp': '2025-09-30 22:25:33.991828', 'step': 10984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:34.022397', 'step': 10984, 'epoch': 2} {'type': 'loss', 'content': 0.0825783908367157, 'timestamp': '2025-09-30 22:25:34.036392', 'step': 10985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:34.072705', 'step': 10985, 'epoch': 2} {'type': 'loss', 'content': 0.08976850658655167, 'timestamp': '2025-09-30 22:25:34.077112', 'step': 10986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:34.108194', 'step': 10986, 'epoch': 2} {'type': 'loss', 'content': 0.19709347188472748, 'timestamp': '2025-09-30 22:25:34.110777', 'step': 10987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:34.145708', 'step': 10987, 'epoch': 2} {'type': 'loss', 'content': 0.07835641503334045, 'timestamp': '2025-09-30 22:25:34.171193', 'step': 10988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:34.201929', 'step': 10988, 'epoch': 2} {'type': 'loss', 'content': 0.13428188860416412, 'timestamp': '2025-09-30 22:25:34.205940', 'step': 10989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:34.243047', 'step': 10989, 'epoch': 2} {'type': 'loss', 'content': 0.09626100957393646, 'timestamp': '2025-09-30 22:25:34.261070', 'step': 10990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:34.297872', 'step': 10990, 'epoch': 2} {'type': 'loss', 'content': 0.09902655333280563, 'timestamp': '2025-09-30 22:25:34.304433', 'step': 10991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:34.337323', 'step': 10991, 'epoch': 2} {'type': 'loss', 'content': 0.07079891115427017, 'timestamp': '2025-09-30 22:25:34.371450', 'step': 10992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:34.404880', 'step': 10992, 'epoch': 2} {'type': 'loss', 'content': 0.14959827065467834, 'timestamp': '2025-09-30 22:25:34.415227', 'step': 10993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:34.448571', 'step': 10993, 'epoch': 2} {'type': 'loss', 'content': 0.1258593201637268, 'timestamp': '2025-09-30 22:25:34.458636', 'step': 10994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:34.493909', 'step': 10994, 'epoch': 2} {'type': 'loss', 'content': 0.061869699507951736, 'timestamp': '2025-09-30 22:25:34.497335', 'step': 10995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:34.528867', 'step': 10995, 'epoch': 2} {'type': 'loss', 'content': 0.13082000613212585, 'timestamp': '2025-09-30 22:25:34.554016', 'step': 10996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:34.584791', 'step': 10996, 'epoch': 2} {'type': 'loss', 'content': 0.12478708475828171, 'timestamp': '2025-09-30 22:25:34.587626', 'step': 10997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:34.625821', 'step': 10997, 'epoch': 2} {'type': 'loss', 'content': 0.0631708949804306, 'timestamp': '2025-09-30 22:25:34.628438', 'step': 10998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:34.661984', 'step': 10998, 'epoch': 2} {'type': 'loss', 'content': 0.18826013803482056, 'timestamp': '2025-09-30 22:25:34.680630', 'step': 10999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:34.719386', 'step': 10999, 'epoch': 2} {'type': 'loss', 'content': 0.15616261959075928, 'timestamp': '2025-09-30 22:25:34.743573', 'step': 11000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11000', 'timestamp': '2025-09-30 22:25:39.622519', 'step': 11000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:39.655177', 'step': 11000, 'epoch': 2} {'type': 'loss', 'content': 0.1356828212738037, 'timestamp': '2025-09-30 22:25:39.659032', 'step': 11001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:39.690106', 'step': 11001, 'epoch': 2} {'type': 'loss', 'content': 0.15345628559589386, 'timestamp': '2025-09-30 22:25:39.692880', 'step': 11002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:39.724282', 'step': 11002, 'epoch': 2} {'type': 'loss', 'content': 0.1246604323387146, 'timestamp': '2025-09-30 22:25:39.732322', 'step': 11003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:39.768146', 'step': 11003, 'epoch': 2} {'type': 'loss', 'content': 0.07409174740314484, 'timestamp': '2025-09-30 22:25:39.793200', 'step': 11004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:39.825648', 'step': 11004, 'epoch': 2} {'type': 'loss', 'content': 0.10448873788118362, 'timestamp': '2025-09-30 22:25:39.829011', 'step': 11005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:39.870733', 'step': 11005, 'epoch': 2} {'type': 'loss', 'content': 0.1957680881023407, 'timestamp': '2025-09-30 22:25:39.881247', 'step': 11006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:39.917617', 'step': 11006, 'epoch': 2} {'type': 'loss', 'content': 0.13313020765781403, 'timestamp': '2025-09-30 22:25:39.922201', 'step': 11007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:39.961965', 'step': 11007, 'epoch': 2} {'type': 'loss', 'content': 0.07587528228759766, 'timestamp': '2025-09-30 22:25:39.986286', 'step': 11008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:40.017407', 'step': 11008, 'epoch': 2} {'type': 'loss', 'content': 0.08641453087329865, 'timestamp': '2025-09-30 22:25:40.026993', 'step': 11009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.063853', 'step': 11009, 'epoch': 2} {'type': 'loss', 'content': 0.13441234827041626, 'timestamp': '2025-09-30 22:25:40.068114', 'step': 11010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.106930', 'step': 11010, 'epoch': 2} {'type': 'loss', 'content': 0.08333475142717361, 'timestamp': '2025-09-30 22:25:40.114899', 'step': 11011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:40.151798', 'step': 11011, 'epoch': 2} {'type': 'loss', 'content': 0.061437491327524185, 'timestamp': '2025-09-30 22:25:40.176188', 'step': 11012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.206991', 'step': 11012, 'epoch': 2} {'type': 'loss', 'content': 0.061994001269340515, 'timestamp': '2025-09-30 22:25:40.216011', 'step': 11013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:40.252376', 'step': 11013, 'epoch': 2} {'type': 'loss', 'content': 0.06041654944419861, 'timestamp': '2025-09-30 22:25:40.256535', 'step': 11014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.288345', 'step': 11014, 'epoch': 2} {'type': 'loss', 'content': 0.0813734382390976, 'timestamp': '2025-09-30 22:25:40.292054', 'step': 11015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.322883', 'step': 11015, 'epoch': 2} {'type': 'loss', 'content': 0.08973496407270432, 'timestamp': '2025-09-30 22:25:40.348472', 'step': 11016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.379486', 'step': 11016, 'epoch': 2} {'type': 'loss', 'content': 0.10515668243169785, 'timestamp': '2025-09-30 22:25:40.386561', 'step': 11017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.419365', 'step': 11017, 'epoch': 2} {'type': 'loss', 'content': 0.11952564120292664, 'timestamp': '2025-09-30 22:25:40.428782', 'step': 11018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.463884', 'step': 11018, 'epoch': 2} {'type': 'loss', 'content': 0.17372138798236847, 'timestamp': '2025-09-30 22:25:40.467489', 'step': 11019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.508548', 'step': 11019, 'epoch': 2} {'type': 'loss', 'content': 0.02860431931912899, 'timestamp': '2025-09-30 22:25:40.539212', 'step': 11020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:40.573373', 'step': 11020, 'epoch': 2} {'type': 'loss', 'content': 0.1407756507396698, 'timestamp': '2025-09-30 22:25:40.576526', 'step': 11021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:40.608299', 'step': 11021, 'epoch': 2} {'type': 'loss', 'content': 0.18135912716388702, 'timestamp': '2025-09-30 22:25:40.614380', 'step': 11022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:40.650339', 'step': 11022, 'epoch': 2} {'type': 'loss', 'content': 0.08880233019590378, 'timestamp': '2025-09-30 22:25:40.655513', 'step': 11023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:40.687005', 'step': 11023, 'epoch': 2} {'type': 'loss', 'content': 0.09948690235614777, 'timestamp': '2025-09-30 22:25:40.711195', 'step': 11024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.742430', 'step': 11024, 'epoch': 2} {'type': 'loss', 'content': 0.08188273012638092, 'timestamp': '2025-09-30 22:25:40.748911', 'step': 11025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:40.783715', 'step': 11025, 'epoch': 2} {'type': 'loss', 'content': 0.1619589924812317, 'timestamp': '2025-09-30 22:25:40.786223', 'step': 11026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.817268', 'step': 11026, 'epoch': 2} {'type': 'loss', 'content': 0.03913421183824539, 'timestamp': '2025-09-30 22:25:40.820047', 'step': 11027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.851314', 'step': 11027, 'epoch': 2} {'type': 'loss', 'content': 0.06663226336240768, 'timestamp': '2025-09-30 22:25:40.876233', 'step': 11028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:40.907138', 'step': 11028, 'epoch': 2} {'type': 'loss', 'content': 0.10781320184469223, 'timestamp': '2025-09-30 22:25:40.910547', 'step': 11029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:40.941572', 'step': 11029, 'epoch': 2} {'type': 'loss', 'content': 0.1569831520318985, 'timestamp': '2025-09-30 22:25:40.946223', 'step': 11030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:40.984995', 'step': 11030, 'epoch': 2} {'type': 'loss', 'content': 0.10366853326559067, 'timestamp': '2025-09-30 22:25:40.988334', 'step': 11031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:41.019769', 'step': 11031, 'epoch': 2} {'type': 'loss', 'content': 0.14734342694282532, 'timestamp': '2025-09-30 22:25:41.044429', 'step': 11032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:41.077435', 'step': 11032, 'epoch': 2} {'type': 'loss', 'content': 0.14118611812591553, 'timestamp': '2025-09-30 22:25:41.081496', 'step': 11033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.113160', 'step': 11033, 'epoch': 2} {'type': 'loss', 'content': 0.17040877044200897, 'timestamp': '2025-09-30 22:25:41.116568', 'step': 11034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:41.148517', 'step': 11034, 'epoch': 2} {'type': 'loss', 'content': 0.10351316630840302, 'timestamp': '2025-09-30 22:25:41.150912', 'step': 11035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.182026', 'step': 11035, 'epoch': 2} {'type': 'loss', 'content': 0.119851253926754, 'timestamp': '2025-09-30 22:25:41.206560', 'step': 11036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:41.244489', 'step': 11036, 'epoch': 2} {'type': 'loss', 'content': 0.15725496411323547, 'timestamp': '2025-09-30 22:25:41.247212', 'step': 11037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:41.279832', 'step': 11037, 'epoch': 2} {'type': 'loss', 'content': 0.1684221625328064, 'timestamp': '2025-09-30 22:25:41.282393', 'step': 11038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:41.317431', 'step': 11038, 'epoch': 2} {'type': 'loss', 'content': 0.06354957073926926, 'timestamp': '2025-09-30 22:25:41.320281', 'step': 11039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.356320', 'step': 11039, 'epoch': 2} {'type': 'loss', 'content': 0.07722886651754379, 'timestamp': '2025-09-30 22:25:41.380493', 'step': 11040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.414211', 'step': 11040, 'epoch': 2} {'type': 'loss', 'content': 0.11869450658559799, 'timestamp': '2025-09-30 22:25:41.422252', 'step': 11041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:41.461762', 'step': 11041, 'epoch': 2} {'type': 'loss', 'content': 0.07560163736343384, 'timestamp': '2025-09-30 22:25:41.468469', 'step': 11042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.504612', 'step': 11042, 'epoch': 2} {'type': 'loss', 'content': 0.0610024631023407, 'timestamp': '2025-09-30 22:25:41.507573', 'step': 11043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:41.538904', 'step': 11043, 'epoch': 2} {'type': 'loss', 'content': 0.13100984692573547, 'timestamp': '2025-09-30 22:25:41.563471', 'step': 11044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.593785', 'step': 11044, 'epoch': 2} {'type': 'loss', 'content': 0.15790720283985138, 'timestamp': '2025-09-30 22:25:41.596390', 'step': 11045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.627955', 'step': 11045, 'epoch': 2} {'type': 'loss', 'content': 0.21495145559310913, 'timestamp': '2025-09-30 22:25:41.631105', 'step': 11046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.662065', 'step': 11046, 'epoch': 2} {'type': 'loss', 'content': 0.04964502155780792, 'timestamp': '2025-09-30 22:25:41.677213', 'step': 11047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.710077', 'step': 11047, 'epoch': 2} {'type': 'loss', 'content': 0.07097778469324112, 'timestamp': '2025-09-30 22:25:41.734604', 'step': 11048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.767862', 'step': 11048, 'epoch': 2} {'type': 'loss', 'content': 0.13198548555374146, 'timestamp': '2025-09-30 22:25:41.770168', 'step': 11049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.803740', 'step': 11049, 'epoch': 2} {'type': 'loss', 'content': 0.13905403017997742, 'timestamp': '2025-09-30 22:25:41.806317', 'step': 11050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.837838', 'step': 11050, 'epoch': 2} {'type': 'loss', 'content': 0.0669960081577301, 'timestamp': '2025-09-30 22:25:41.840441', 'step': 11051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:41.874352', 'step': 11051, 'epoch': 2} {'type': 'loss', 'content': 0.07703805714845657, 'timestamp': '2025-09-30 22:25:41.904052', 'step': 11052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:41.941632', 'step': 11052, 'epoch': 2} {'type': 'loss', 'content': 0.0750495195388794, 'timestamp': '2025-09-30 22:25:41.946306', 'step': 11053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:41.978215', 'step': 11053, 'epoch': 2} {'type': 'loss', 'content': 0.12949447333812714, 'timestamp': '2025-09-30 22:25:41.983902', 'step': 11054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:42.017133', 'step': 11054, 'epoch': 2} {'type': 'loss', 'content': 0.07374005019664764, 'timestamp': '2025-09-30 22:25:42.020739', 'step': 11055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:42.051854', 'step': 11055, 'epoch': 2} {'type': 'loss', 'content': 0.044094353914260864, 'timestamp': '2025-09-30 22:25:42.076736', 'step': 11056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:42.108706', 'step': 11056, 'epoch': 2} {'type': 'loss', 'content': 0.1488487869501114, 'timestamp': '2025-09-30 22:25:42.115156', 'step': 11057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:42.151725', 'step': 11057, 'epoch': 2} {'type': 'loss', 'content': 0.06726135313510895, 'timestamp': '2025-09-30 22:25:42.155681', 'step': 11058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.187583', 'step': 11058, 'epoch': 2} {'type': 'loss', 'content': 0.0768093466758728, 'timestamp': '2025-09-30 22:25:42.192040', 'step': 11059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:42.226943', 'step': 11059, 'epoch': 2} {'type': 'loss', 'content': 0.15107229351997375, 'timestamp': '2025-09-30 22:25:42.251988', 'step': 11060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.283268', 'step': 11060, 'epoch': 2} {'type': 'loss', 'content': 0.12090791761875153, 'timestamp': '2025-09-30 22:25:42.286735', 'step': 11061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.319259', 'step': 11061, 'epoch': 2} {'type': 'loss', 'content': 0.039282411336898804, 'timestamp': '2025-09-30 22:25:42.325466', 'step': 11062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:42.360154', 'step': 11062, 'epoch': 2} {'type': 'loss', 'content': 0.06047310680150986, 'timestamp': '2025-09-30 22:25:42.363637', 'step': 11063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:42.401735', 'step': 11063, 'epoch': 2} {'type': 'loss', 'content': 0.1110755130648613, 'timestamp': '2025-09-30 22:25:42.426945', 'step': 11064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.466386', 'step': 11064, 'epoch': 2} {'type': 'loss', 'content': 0.0706920251250267, 'timestamp': '2025-09-30 22:25:42.471708', 'step': 11065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:42.513291', 'step': 11065, 'epoch': 2} {'type': 'loss', 'content': 0.061916615813970566, 'timestamp': '2025-09-30 22:25:42.520038', 'step': 11066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.556102', 'step': 11066, 'epoch': 2} {'type': 'loss', 'content': 0.22776947915554047, 'timestamp': '2025-09-30 22:25:42.561599', 'step': 11067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.595025', 'step': 11067, 'epoch': 2} {'type': 'loss', 'content': 0.1546618938446045, 'timestamp': '2025-09-30 22:25:42.620906', 'step': 11068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.655933', 'step': 11068, 'epoch': 2} {'type': 'loss', 'content': 0.10980416089296341, 'timestamp': '2025-09-30 22:25:42.661370', 'step': 11069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:42.702532', 'step': 11069, 'epoch': 2} {'type': 'loss', 'content': 0.1249246597290039, 'timestamp': '2025-09-30 22:25:42.707067', 'step': 11070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.750675', 'step': 11070, 'epoch': 2} {'type': 'loss', 'content': 0.20650827884674072, 'timestamp': '2025-09-30 22:25:42.757625', 'step': 11071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:42.799053', 'step': 11071, 'epoch': 2} {'type': 'loss', 'content': 0.15085558593273163, 'timestamp': '2025-09-30 22:25:42.822813', 'step': 11072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:42.855124', 'step': 11072, 'epoch': 2} {'type': 'loss', 'content': 0.09502260386943817, 'timestamp': '2025-09-30 22:25:42.863671', 'step': 11073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:42.897081', 'step': 11073, 'epoch': 2} {'type': 'loss', 'content': 0.16662031412124634, 'timestamp': '2025-09-30 22:25:42.900388', 'step': 11074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:42.933157', 'step': 11074, 'epoch': 2} {'type': 'loss', 'content': 0.060801174491643906, 'timestamp': '2025-09-30 22:25:42.940713', 'step': 11075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:42.972733', 'step': 11075, 'epoch': 2} {'type': 'loss', 'content': 0.19636036455631256, 'timestamp': '2025-09-30 22:25:43.001238', 'step': 11076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:43.032455', 'step': 11076, 'epoch': 2} {'type': 'loss', 'content': 0.08532959222793579, 'timestamp': '2025-09-30 22:25:43.036097', 'step': 11077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.067545', 'step': 11077, 'epoch': 2} {'type': 'loss', 'content': 0.10240133851766586, 'timestamp': '2025-09-30 22:25:43.071339', 'step': 11078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.104559', 'step': 11078, 'epoch': 2} {'type': 'loss', 'content': 0.07910935580730438, 'timestamp': '2025-09-30 22:25:43.110027', 'step': 11079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.143645', 'step': 11079, 'epoch': 2} {'type': 'loss', 'content': 0.15508580207824707, 'timestamp': '2025-09-30 22:25:43.169593', 'step': 11080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:43.208042', 'step': 11080, 'epoch': 2} {'type': 'loss', 'content': 0.13964730501174927, 'timestamp': '2025-09-30 22:25:43.211176', 'step': 11081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:43.242992', 'step': 11081, 'epoch': 2} {'type': 'loss', 'content': 0.07579878717660904, 'timestamp': '2025-09-30 22:25:43.247231', 'step': 11082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.286725', 'step': 11082, 'epoch': 2} {'type': 'loss', 'content': 0.10538053512573242, 'timestamp': '2025-09-30 22:25:43.291037', 'step': 11083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.323156', 'step': 11083, 'epoch': 2} {'type': 'loss', 'content': 0.15086348354816437, 'timestamp': '2025-09-30 22:25:43.352736', 'step': 11084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.396977', 'step': 11084, 'epoch': 2} {'type': 'loss', 'content': 0.11061209440231323, 'timestamp': '2025-09-30 22:25:43.401438', 'step': 11085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:43.435098', 'step': 11085, 'epoch': 2} {'type': 'loss', 'content': 0.03668675199151039, 'timestamp': '2025-09-30 22:25:43.439880', 'step': 11086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.478913', 'step': 11086, 'epoch': 2} {'type': 'loss', 'content': 0.0596817210316658, 'timestamp': '2025-09-30 22:25:43.482442', 'step': 11087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.514855', 'step': 11087, 'epoch': 2} {'type': 'loss', 'content': 0.04979102686047554, 'timestamp': '2025-09-30 22:25:43.538977', 'step': 11088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.569013', 'step': 11088, 'epoch': 2} {'type': 'loss', 'content': 0.1645515263080597, 'timestamp': '2025-09-30 22:25:43.571627', 'step': 11089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:43.601787', 'step': 11089, 'epoch': 2} {'type': 'loss', 'content': 0.1320977509021759, 'timestamp': '2025-09-30 22:25:43.605473', 'step': 11090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.637304', 'step': 11090, 'epoch': 2} {'type': 'loss', 'content': 0.1382771134376526, 'timestamp': '2025-09-30 22:25:43.640240', 'step': 11091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.673014', 'step': 11091, 'epoch': 2} {'type': 'loss', 'content': 0.18154089152812958, 'timestamp': '2025-09-30 22:25:43.705535', 'step': 11092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.737982', 'step': 11092, 'epoch': 2} {'type': 'loss', 'content': 0.16293057799339294, 'timestamp': '2025-09-30 22:25:43.741236', 'step': 11093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:43.772744', 'step': 11093, 'epoch': 2} {'type': 'loss', 'content': 0.1122986227273941, 'timestamp': '2025-09-30 22:25:43.779111', 'step': 11094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:43.812071', 'step': 11094, 'epoch': 2} {'type': 'loss', 'content': 0.06960248947143555, 'timestamp': '2025-09-30 22:25:43.815524', 'step': 11095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:43.851804', 'step': 11095, 'epoch': 2} {'type': 'loss', 'content': 0.1391810029745102, 'timestamp': '2025-09-30 22:25:43.877647', 'step': 11096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:43.910966', 'step': 11096, 'epoch': 2} {'type': 'loss', 'content': 0.12015548348426819, 'timestamp': '2025-09-30 22:25:43.914129', 'step': 11097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:43.944808', 'step': 11097, 'epoch': 2} {'type': 'loss', 'content': 0.12037811428308487, 'timestamp': '2025-09-30 22:25:43.947871', 'step': 11098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:43.988167', 'step': 11098, 'epoch': 2} {'type': 'loss', 'content': 0.12983815371990204, 'timestamp': '2025-09-30 22:25:43.994852', 'step': 11099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:44.030703', 'step': 11099, 'epoch': 2} {'type': 'loss', 'content': 0.16247129440307617, 'timestamp': '2025-09-30 22:25:44.054884', 'step': 11100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.085780', 'step': 11100, 'epoch': 2} {'type': 'loss', 'content': 0.09711857885122299, 'timestamp': '2025-09-30 22:25:44.090607', 'step': 11101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.122733', 'step': 11101, 'epoch': 2} {'type': 'loss', 'content': 0.12663084268569946, 'timestamp': '2025-09-30 22:25:44.125697', 'step': 11102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.157278', 'step': 11102, 'epoch': 2} {'type': 'loss', 'content': 0.11122447997331619, 'timestamp': '2025-09-30 22:25:44.159365', 'step': 11103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:44.189089', 'step': 11103, 'epoch': 2} {'type': 'loss', 'content': 0.09033586084842682, 'timestamp': '2025-09-30 22:25:44.216916', 'step': 11104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.247914', 'step': 11104, 'epoch': 2} {'type': 'loss', 'content': 0.08335818350315094, 'timestamp': '2025-09-30 22:25:44.250888', 'step': 11105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.282390', 'step': 11105, 'epoch': 2} {'type': 'loss', 'content': 0.08858697861433029, 'timestamp': '2025-09-30 22:25:44.286421', 'step': 11106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.317244', 'step': 11106, 'epoch': 2} {'type': 'loss', 'content': 0.060551393777132034, 'timestamp': '2025-09-30 22:25:44.319595', 'step': 11107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:44.355742', 'step': 11107, 'epoch': 2} {'type': 'loss', 'content': 0.18981827795505524, 'timestamp': '2025-09-30 22:25:44.379825', 'step': 11108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.410156', 'step': 11108, 'epoch': 2} {'type': 'loss', 'content': 0.10853713750839233, 'timestamp': '2025-09-30 22:25:44.416917', 'step': 11109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.448336', 'step': 11109, 'epoch': 2} {'type': 'loss', 'content': 0.22907035052776337, 'timestamp': '2025-09-30 22:25:44.456005', 'step': 11110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.492837', 'step': 11110, 'epoch': 2} {'type': 'loss', 'content': 0.15796063840389252, 'timestamp': '2025-09-30 22:25:44.504595', 'step': 11111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-30 22:25:44.545444', 'step': 11111, 'epoch': 2} {'type': 'loss', 'content': 0.0922304317355156, 'timestamp': '2025-09-30 22:25:44.580182', 'step': 11112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.613762', 'step': 11112, 'epoch': 2} {'type': 'loss', 'content': 0.07707511633634567, 'timestamp': '2025-09-30 22:25:44.619531', 'step': 11113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.653304', 'step': 11113, 'epoch': 2} {'type': 'loss', 'content': 0.12207695096731186, 'timestamp': '2025-09-30 22:25:44.657051', 'step': 11114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.690512', 'step': 11114, 'epoch': 2} {'type': 'loss', 'content': 0.07195847481489182, 'timestamp': '2025-09-30 22:25:44.694975', 'step': 11115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.726791', 'step': 11115, 'epoch': 2} {'type': 'loss', 'content': 0.1243758499622345, 'timestamp': '2025-09-30 22:25:44.753537', 'step': 11116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.785819', 'step': 11116, 'epoch': 2} {'type': 'loss', 'content': 0.15475530922412872, 'timestamp': '2025-09-30 22:25:44.788110', 'step': 11117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.821463', 'step': 11117, 'epoch': 2} {'type': 'loss', 'content': 0.07280177623033524, 'timestamp': '2025-09-30 22:25:44.824946', 'step': 11118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:44.858529', 'step': 11118, 'epoch': 2} {'type': 'loss', 'content': 0.09713269025087357, 'timestamp': '2025-09-30 22:25:44.864347', 'step': 11119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.896772', 'step': 11119, 'epoch': 2} {'type': 'loss', 'content': 0.21775272488594055, 'timestamp': '2025-09-30 22:25:44.922876', 'step': 11120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:44.956793', 'step': 11120, 'epoch': 2} {'type': 'loss', 'content': 0.12389792501926422, 'timestamp': '2025-09-30 22:25:44.968452', 'step': 11121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:44.999612', 'step': 11121, 'epoch': 2} {'type': 'loss', 'content': 0.11983613669872284, 'timestamp': '2025-09-30 22:25:45.010502', 'step': 11122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:45.042299', 'step': 11122, 'epoch': 2} {'type': 'loss', 'content': 0.24526293575763702, 'timestamp': '2025-09-30 22:25:45.052410', 'step': 11123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:45.087240', 'step': 11123, 'epoch': 2} {'type': 'loss', 'content': 0.10791260749101639, 'timestamp': '2025-09-30 22:25:45.113213', 'step': 11124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:45.145201', 'step': 11124, 'epoch': 2} {'type': 'loss', 'content': 0.09217705577611923, 'timestamp': '2025-09-30 22:25:45.149427', 'step': 11125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.183149', 'step': 11125, 'epoch': 2} {'type': 'loss', 'content': 0.13702502846717834, 'timestamp': '2025-09-30 22:25:45.193843', 'step': 11126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.226817', 'step': 11126, 'epoch': 2} {'type': 'loss', 'content': 0.1305943727493286, 'timestamp': '2025-09-30 22:25:45.231965', 'step': 11127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:45.264458', 'step': 11127, 'epoch': 2} {'type': 'loss', 'content': 0.09087838232517242, 'timestamp': '2025-09-30 22:25:45.291444', 'step': 11128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:45.323775', 'step': 11128, 'epoch': 2} {'type': 'loss', 'content': 0.06032199040055275, 'timestamp': '2025-09-30 22:25:45.327331', 'step': 11129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.359291', 'step': 11129, 'epoch': 2} {'type': 'loss', 'content': 0.09903529286384583, 'timestamp': '2025-09-30 22:25:45.363618', 'step': 11130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:45.414088', 'step': 11130, 'epoch': 2} {'type': 'loss', 'content': 0.1181986927986145, 'timestamp': '2025-09-30 22:25:45.418210', 'step': 11131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:45.473048', 'step': 11131, 'epoch': 2} {'type': 'loss', 'content': 0.15673109889030457, 'timestamp': '2025-09-30 22:25:45.497548', 'step': 11132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:45.575503', 'step': 11132, 'epoch': 2} {'type': 'loss', 'content': 0.07339557260274887, 'timestamp': '2025-09-30 22:25:45.578547', 'step': 11133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.625653', 'step': 11133, 'epoch': 2} {'type': 'loss', 'content': 0.1217273399233818, 'timestamp': '2025-09-30 22:25:45.632506', 'step': 11134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:45.686855', 'step': 11134, 'epoch': 2} {'type': 'loss', 'content': 0.18501293659210205, 'timestamp': '2025-09-30 22:25:45.692890', 'step': 11135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.742462', 'step': 11135, 'epoch': 2} {'type': 'loss', 'content': 0.04332861676812172, 'timestamp': '2025-09-30 22:25:45.771531', 'step': 11136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:45.848939', 'step': 11136, 'epoch': 2} {'type': 'loss', 'content': 0.12804481387138367, 'timestamp': '2025-09-30 22:25:45.852803', 'step': 11137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:45.895814', 'step': 11137, 'epoch': 2} {'type': 'loss', 'content': 0.08824148774147034, 'timestamp': '2025-09-30 22:25:45.903547', 'step': 11138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:45.963536', 'step': 11138, 'epoch': 2} {'type': 'loss', 'content': 0.10998551547527313, 'timestamp': '2025-09-30 22:25:45.972851', 'step': 11139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.041826', 'step': 11139, 'epoch': 2} {'type': 'loss', 'content': 0.1530267894268036, 'timestamp': '2025-09-30 22:25:46.071931', 'step': 11140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:46.129982', 'step': 11140, 'epoch': 2} {'type': 'loss', 'content': 0.10909661650657654, 'timestamp': '2025-09-30 22:25:46.138010', 'step': 11141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.194664', 'step': 11141, 'epoch': 2} {'type': 'loss', 'content': 0.07647507637739182, 'timestamp': '2025-09-30 22:25:46.197456', 'step': 11142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.244684', 'step': 11142, 'epoch': 2} {'type': 'loss', 'content': 0.1462668478488922, 'timestamp': '2025-09-30 22:25:46.248805', 'step': 11143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.288127', 'step': 11143, 'epoch': 2} {'type': 'loss', 'content': 0.09767895936965942, 'timestamp': '2025-09-30 22:25:46.313775', 'step': 11144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:46.361673', 'step': 11144, 'epoch': 2} {'type': 'loss', 'content': 0.11994230002164841, 'timestamp': '2025-09-30 22:25:46.365765', 'step': 11145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.420673', 'step': 11145, 'epoch': 2} {'type': 'loss', 'content': 0.25690513849258423, 'timestamp': '2025-09-30 22:25:46.424807', 'step': 11146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:46.489755', 'step': 11146, 'epoch': 2} {'type': 'loss', 'content': 0.14585675299167633, 'timestamp': '2025-09-30 22:25:46.492410', 'step': 11147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.543470', 'step': 11147, 'epoch': 2} {'type': 'loss', 'content': 0.10911490023136139, 'timestamp': '2025-09-30 22:25:46.568091', 'step': 11148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:46.626181', 'step': 11148, 'epoch': 2} {'type': 'loss', 'content': 0.1185644343495369, 'timestamp': '2025-09-30 22:25:46.629146', 'step': 11149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:46.684412', 'step': 11149, 'epoch': 2} {'type': 'loss', 'content': 0.15002170205116272, 'timestamp': '2025-09-30 22:25:46.687409', 'step': 11150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:46.718535', 'step': 11150, 'epoch': 2} {'type': 'loss', 'content': 0.09018681943416595, 'timestamp': '2025-09-30 22:25:46.722550', 'step': 11151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:46.756771', 'step': 11151, 'epoch': 2} {'type': 'loss', 'content': 0.08470399677753448, 'timestamp': '2025-09-30 22:25:46.789596', 'step': 11152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:46.821963', 'step': 11152, 'epoch': 2} {'type': 'loss', 'content': 0.08806835860013962, 'timestamp': '2025-09-30 22:25:46.824633', 'step': 11153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.859048', 'step': 11153, 'epoch': 2} {'type': 'loss', 'content': 0.2003653645515442, 'timestamp': '2025-09-30 22:25:46.861453', 'step': 11154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.891569', 'step': 11154, 'epoch': 2} {'type': 'loss', 'content': 0.2171231359243393, 'timestamp': '2025-09-30 22:25:46.894012', 'step': 11155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:46.927808', 'step': 11155, 'epoch': 2} {'type': 'loss', 'content': 0.05602191016077995, 'timestamp': '2025-09-30 22:25:46.961669', 'step': 11156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:46.993746', 'step': 11156, 'epoch': 2} {'type': 'loss', 'content': 0.0317324697971344, 'timestamp': '2025-09-30 22:25:46.997352', 'step': 11157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.032153', 'step': 11157, 'epoch': 2} {'type': 'loss', 'content': 0.08947648853063583, 'timestamp': '2025-09-30 22:25:47.038395', 'step': 11158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.079185', 'step': 11158, 'epoch': 2} {'type': 'loss', 'content': 0.06169655919075012, 'timestamp': '2025-09-30 22:25:47.083399', 'step': 11159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:47.114394', 'step': 11159, 'epoch': 2} {'type': 'loss', 'content': 0.06292373687028885, 'timestamp': '2025-09-30 22:25:47.139909', 'step': 11160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:47.171170', 'step': 11160, 'epoch': 2} {'type': 'loss', 'content': 0.16285423934459686, 'timestamp': '2025-09-30 22:25:47.175116', 'step': 11161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.207161', 'step': 11161, 'epoch': 2} {'type': 'loss', 'content': 0.12426783889532089, 'timestamp': '2025-09-30 22:25:47.210671', 'step': 11162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:47.242650', 'step': 11162, 'epoch': 2} {'type': 'loss', 'content': 0.10765273123979568, 'timestamp': '2025-09-30 22:25:47.250853', 'step': 11163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:47.283832', 'step': 11163, 'epoch': 2} {'type': 'loss', 'content': 0.13333797454833984, 'timestamp': '2025-09-30 22:25:47.310666', 'step': 11164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:47.350929', 'step': 11164, 'epoch': 2} {'type': 'loss', 'content': 0.11624395102262497, 'timestamp': '2025-09-30 22:25:47.353429', 'step': 11165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.386479', 'step': 11165, 'epoch': 2} {'type': 'loss', 'content': 0.08131244778633118, 'timestamp': '2025-09-30 22:25:47.394224', 'step': 11166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.425818', 'step': 11166, 'epoch': 2} {'type': 'loss', 'content': 0.1846511960029602, 'timestamp': '2025-09-30 22:25:47.432507', 'step': 11167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.463541', 'step': 11167, 'epoch': 2} {'type': 'loss', 'content': 0.1355465054512024, 'timestamp': '2025-09-30 22:25:47.493042', 'step': 11168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:47.529365', 'step': 11168, 'epoch': 2} {'type': 'loss', 'content': 0.12262226641178131, 'timestamp': '2025-09-30 22:25:47.531608', 'step': 11169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:47.563349', 'step': 11169, 'epoch': 2} {'type': 'loss', 'content': 0.054630786180496216, 'timestamp': '2025-09-30 22:25:47.566037', 'step': 11170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:47.597445', 'step': 11170, 'epoch': 2} {'type': 'loss', 'content': 0.14900442957878113, 'timestamp': '2025-09-30 22:25:47.600001', 'step': 11171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:47.631331', 'step': 11171, 'epoch': 2} {'type': 'loss', 'content': 0.07484777271747589, 'timestamp': '2025-09-30 22:25:47.659067', 'step': 11172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.692394', 'step': 11172, 'epoch': 2} {'type': 'loss', 'content': 0.1504577100276947, 'timestamp': '2025-09-30 22:25:47.696369', 'step': 11173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.729983', 'step': 11173, 'epoch': 2} {'type': 'loss', 'content': 0.16594138741493225, 'timestamp': '2025-09-30 22:25:47.733200', 'step': 11174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:47.763761', 'step': 11174, 'epoch': 2} {'type': 'loss', 'content': 0.17613042891025543, 'timestamp': '2025-09-30 22:25:47.767757', 'step': 11175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.799734', 'step': 11175, 'epoch': 2} {'type': 'loss', 'content': 0.1971891075372696, 'timestamp': '2025-09-30 22:25:47.823870', 'step': 11176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.867009', 'step': 11176, 'epoch': 2} {'type': 'loss', 'content': 0.08007778227329254, 'timestamp': '2025-09-30 22:25:47.872352', 'step': 11177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:47.906321', 'step': 11177, 'epoch': 2} {'type': 'loss', 'content': 0.08715938031673431, 'timestamp': '2025-09-30 22:25:47.913607', 'step': 11178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:47.948168', 'step': 11178, 'epoch': 2} {'type': 'loss', 'content': 0.1257425993680954, 'timestamp': '2025-09-30 22:25:47.950635', 'step': 11179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:47.982594', 'step': 11179, 'epoch': 2} {'type': 'loss', 'content': 0.09033235162496567, 'timestamp': '2025-09-30 22:25:48.008392', 'step': 11180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:48.040946', 'step': 11180, 'epoch': 2} {'type': 'loss', 'content': 0.137965127825737, 'timestamp': '2025-09-30 22:25:48.053238', 'step': 11181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:48.095525', 'step': 11181, 'epoch': 2} {'type': 'loss', 'content': 0.07357193529605865, 'timestamp': '2025-09-30 22:25:48.098495', 'step': 11182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:48.129881', 'step': 11182, 'epoch': 2} {'type': 'loss', 'content': 0.1632959246635437, 'timestamp': '2025-09-30 22:25:48.133784', 'step': 11183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:48.164164', 'step': 11183, 'epoch': 2} {'type': 'loss', 'content': 0.118442103266716, 'timestamp': '2025-09-30 22:25:48.189423', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:25:55.962635', 'step': 11184, 'epoch': 2} {'type': 'pplx', 'content': 12146.95238193812, 'timestamp': '2025-09-30 22:25:55.968789', 'step': 11184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:55.998485', 'step': 11184, 'epoch': 2} {'type': 'loss', 'content': 0.13716579973697662, 'timestamp': '2025-09-30 22:25:56.009267', 'step': 11185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.044538', 'step': 11185, 'epoch': 2} {'type': 'loss', 'content': 0.11312772333621979, 'timestamp': '2025-09-30 22:25:56.049474', 'step': 11186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.090825', 'step': 11186, 'epoch': 2} {'type': 'loss', 'content': 0.118388831615448, 'timestamp': '2025-09-30 22:25:56.096587', 'step': 11187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.127330', 'step': 11187, 'epoch': 2} {'type': 'loss', 'content': 0.12603601813316345, 'timestamp': '2025-09-30 22:25:56.155419', 'step': 11188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:56.186911', 'step': 11188, 'epoch': 2} {'type': 'loss', 'content': 0.05845026671886444, 'timestamp': '2025-09-30 22:25:56.191675', 'step': 11189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.231430', 'step': 11189, 'epoch': 2} {'type': 'loss', 'content': 0.15372629463672638, 'timestamp': '2025-09-30 22:25:56.243570', 'step': 11190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.280188', 'step': 11190, 'epoch': 2} {'type': 'loss', 'content': 0.1134270504117012, 'timestamp': '2025-09-30 22:25:56.284482', 'step': 11191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.319498', 'step': 11191, 'epoch': 2} {'type': 'loss', 'content': 0.19933545589447021, 'timestamp': '2025-09-30 22:25:56.345376', 'step': 11192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.375872', 'step': 11192, 'epoch': 2} {'type': 'loss', 'content': 0.14601798355579376, 'timestamp': '2025-09-30 22:25:56.380807', 'step': 11193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.413830', 'step': 11193, 'epoch': 2} {'type': 'loss', 'content': 0.10462579131126404, 'timestamp': '2025-09-30 22:25:56.416674', 'step': 11194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:56.448816', 'step': 11194, 'epoch': 2} {'type': 'loss', 'content': 0.08885730057954788, 'timestamp': '2025-09-30 22:25:56.454027', 'step': 11195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.486198', 'step': 11195, 'epoch': 2} {'type': 'loss', 'content': 0.1337098628282547, 'timestamp': '2025-09-30 22:25:56.511936', 'step': 11196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.544448', 'step': 11196, 'epoch': 2} {'type': 'loss', 'content': 0.09191273152828217, 'timestamp': '2025-09-30 22:25:56.547992', 'step': 11197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.579766', 'step': 11197, 'epoch': 2} {'type': 'loss', 'content': 0.029599251225590706, 'timestamp': '2025-09-30 22:25:56.584237', 'step': 11198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:56.615703', 'step': 11198, 'epoch': 2} {'type': 'loss', 'content': 0.11308237165212631, 'timestamp': '2025-09-30 22:25:56.619620', 'step': 11199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.651212', 'step': 11199, 'epoch': 2} {'type': 'loss', 'content': 0.11753253638744354, 'timestamp': '2025-09-30 22:25:56.678156', 'step': 11200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.712636', 'step': 11200, 'epoch': 2} {'type': 'loss', 'content': 0.14945173263549805, 'timestamp': '2025-09-30 22:25:56.717397', 'step': 11201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.748559', 'step': 11201, 'epoch': 2} {'type': 'loss', 'content': 0.048823319375514984, 'timestamp': '2025-09-30 22:25:56.754673', 'step': 11202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.791039', 'step': 11202, 'epoch': 2} {'type': 'loss', 'content': 0.12533193826675415, 'timestamp': '2025-09-30 22:25:56.796851', 'step': 11203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:56.829759', 'step': 11203, 'epoch': 2} {'type': 'loss', 'content': 0.15672700107097626, 'timestamp': '2025-09-30 22:25:56.853991', 'step': 11204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.888591', 'step': 11204, 'epoch': 2} {'type': 'loss', 'content': 0.10138960927724838, 'timestamp': '2025-09-30 22:25:56.892237', 'step': 11205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.928185', 'step': 11205, 'epoch': 2} {'type': 'loss', 'content': 0.08642734587192535, 'timestamp': '2025-09-30 22:25:56.931660', 'step': 11206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:56.961936', 'step': 11206, 'epoch': 2} {'type': 'loss', 'content': 0.10118469595909119, 'timestamp': '2025-09-30 22:25:56.964704', 'step': 11207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:56.997157', 'step': 11207, 'epoch': 2} {'type': 'loss', 'content': 0.10690777003765106, 'timestamp': '2025-09-30 22:25:57.021873', 'step': 11208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.051578', 'step': 11208, 'epoch': 2} {'type': 'loss', 'content': 0.1382300704717636, 'timestamp': '2025-09-30 22:25:57.054324', 'step': 11209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:57.084549', 'step': 11209, 'epoch': 2} {'type': 'loss', 'content': 0.12752820551395416, 'timestamp': '2025-09-30 22:25:57.089537', 'step': 11210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.127268', 'step': 11210, 'epoch': 2} {'type': 'loss', 'content': 0.1370585709810257, 'timestamp': '2025-09-30 22:25:57.130819', 'step': 11211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:57.161021', 'step': 11211, 'epoch': 2} {'type': 'loss', 'content': 0.07493745535612106, 'timestamp': '2025-09-30 22:25:57.185509', 'step': 11212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.220308', 'step': 11212, 'epoch': 2} {'type': 'loss', 'content': 0.09787435829639435, 'timestamp': '2025-09-30 22:25:57.227775', 'step': 11213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.259372', 'step': 11213, 'epoch': 2} {'type': 'loss', 'content': 0.08458956331014633, 'timestamp': '2025-09-30 22:25:57.270645', 'step': 11214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:57.302610', 'step': 11214, 'epoch': 2} {'type': 'loss', 'content': 0.09789220988750458, 'timestamp': '2025-09-30 22:25:57.306119', 'step': 11215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:57.347420', 'step': 11215, 'epoch': 2} {'type': 'loss', 'content': 0.0978565439581871, 'timestamp': '2025-09-30 22:25:57.374471', 'step': 11216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.406681', 'step': 11216, 'epoch': 2} {'type': 'loss', 'content': 0.09151734411716461, 'timestamp': '2025-09-30 22:25:57.411128', 'step': 11217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:25:57.443576', 'step': 11217, 'epoch': 2} {'type': 'loss', 'content': 0.16535711288452148, 'timestamp': '2025-09-30 22:25:57.448682', 'step': 11218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:57.485545', 'step': 11218, 'epoch': 2} {'type': 'loss', 'content': 0.07686319947242737, 'timestamp': '2025-09-30 22:25:57.497492', 'step': 11219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:57.530299', 'step': 11219, 'epoch': 2} {'type': 'loss', 'content': 0.058273058384656906, 'timestamp': '2025-09-30 22:25:57.557725', 'step': 11220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:57.594057', 'step': 11220, 'epoch': 2} {'type': 'loss', 'content': 0.05175519362092018, 'timestamp': '2025-09-30 22:25:57.598717', 'step': 11221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:57.631523', 'step': 11221, 'epoch': 2} {'type': 'loss', 'content': 0.09455106407403946, 'timestamp': '2025-09-30 22:25:57.642974', 'step': 11222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:57.679504', 'step': 11222, 'epoch': 2} {'type': 'loss', 'content': 0.09772500395774841, 'timestamp': '2025-09-30 22:25:57.684014', 'step': 11223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.715341', 'step': 11223, 'epoch': 2} {'type': 'loss', 'content': 0.10384882986545563, 'timestamp': '2025-09-30 22:25:57.739477', 'step': 11224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:57.770848', 'step': 11224, 'epoch': 2} {'type': 'loss', 'content': 0.02756923995912075, 'timestamp': '2025-09-30 22:25:57.774647', 'step': 11225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:57.807535', 'step': 11225, 'epoch': 2} {'type': 'loss', 'content': 0.04210824891924858, 'timestamp': '2025-09-30 22:25:57.811532', 'step': 11226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:57.843184', 'step': 11226, 'epoch': 2} {'type': 'loss', 'content': 0.06418810039758682, 'timestamp': '2025-09-30 22:25:57.848646', 'step': 11227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:57.880099', 'step': 11227, 'epoch': 2} {'type': 'loss', 'content': 0.11860453337430954, 'timestamp': '2025-09-30 22:25:57.906463', 'step': 11228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:57.945064', 'step': 11228, 'epoch': 2} {'type': 'loss', 'content': 0.13519151508808136, 'timestamp': '2025-09-30 22:25:57.957273', 'step': 11229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:25:57.988115', 'step': 11229, 'epoch': 2} {'type': 'loss', 'content': 0.09587674587965012, 'timestamp': '2025-09-30 22:25:57.994532', 'step': 11230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.033961', 'step': 11230, 'epoch': 2} {'type': 'loss', 'content': 0.05549224838614464, 'timestamp': '2025-09-30 22:25:58.037597', 'step': 11231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.076302', 'step': 11231, 'epoch': 2} {'type': 'loss', 'content': 0.11416306346654892, 'timestamp': '2025-09-30 22:25:58.101343', 'step': 11232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.135503', 'step': 11232, 'epoch': 2} {'type': 'loss', 'content': 0.09850874543190002, 'timestamp': '2025-09-30 22:25:58.138371', 'step': 11233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.173860', 'step': 11233, 'epoch': 2} {'type': 'loss', 'content': 0.17720572650432587, 'timestamp': '2025-09-30 22:25:58.176453', 'step': 11234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.210040', 'step': 11234, 'epoch': 2} {'type': 'loss', 'content': 0.029629258438944817, 'timestamp': '2025-09-30 22:25:58.216119', 'step': 11235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:58.247164', 'step': 11235, 'epoch': 2} {'type': 'loss', 'content': 0.03587250038981438, 'timestamp': '2025-09-30 22:25:58.279455', 'step': 11236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.313959', 'step': 11236, 'epoch': 2} {'type': 'loss', 'content': 0.09018231183290482, 'timestamp': '2025-09-30 22:25:58.321795', 'step': 11237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.353260', 'step': 11237, 'epoch': 2} {'type': 'loss', 'content': 0.11563022434711456, 'timestamp': '2025-09-30 22:25:58.357955', 'step': 11238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.389159', 'step': 11238, 'epoch': 2} {'type': 'loss', 'content': 0.06782121956348419, 'timestamp': '2025-09-30 22:25:58.395000', 'step': 11239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.427894', 'step': 11239, 'epoch': 2} {'type': 'loss', 'content': 0.11549612134695053, 'timestamp': '2025-09-30 22:25:58.460520', 'step': 11240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.501911', 'step': 11240, 'epoch': 2} {'type': 'loss', 'content': 0.10227805376052856, 'timestamp': '2025-09-30 22:25:58.507228', 'step': 11241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:58.540319', 'step': 11241, 'epoch': 2} {'type': 'loss', 'content': 0.062083322554826736, 'timestamp': '2025-09-30 22:25:58.547394', 'step': 11242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.577996', 'step': 11242, 'epoch': 2} {'type': 'loss', 'content': 0.10525413602590561, 'timestamp': '2025-09-30 22:25:58.581521', 'step': 11243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.614852', 'step': 11243, 'epoch': 2} {'type': 'loss', 'content': 0.08621776103973389, 'timestamp': '2025-09-30 22:25:58.650408', 'step': 11244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:58.687895', 'step': 11244, 'epoch': 2} {'type': 'loss', 'content': 0.12711340188980103, 'timestamp': '2025-09-30 22:25:58.692719', 'step': 11245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.723979', 'step': 11245, 'epoch': 2} {'type': 'loss', 'content': 0.05674711987376213, 'timestamp': '2025-09-30 22:25:58.726619', 'step': 11246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:58.757829', 'step': 11246, 'epoch': 2} {'type': 'loss', 'content': 0.09403598308563232, 'timestamp': '2025-09-30 22:25:58.763051', 'step': 11247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:25:58.797773', 'step': 11247, 'epoch': 2} {'type': 'loss', 'content': 0.15861693024635315, 'timestamp': '2025-09-30 22:25:58.824583', 'step': 11248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:25:58.855715', 'step': 11248, 'epoch': 2} {'type': 'loss', 'content': 0.11895211040973663, 'timestamp': '2025-09-30 22:25:58.858920', 'step': 11249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:58.889290', 'step': 11249, 'epoch': 2} {'type': 'loss', 'content': 0.0984734445810318, 'timestamp': '2025-09-30 22:25:58.897338', 'step': 11250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:25:58.929054', 'step': 11250, 'epoch': 2} {'type': 'loss', 'content': 0.11008957773447037, 'timestamp': '2025-09-30 22:25:58.934804', 'step': 11251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:58.969401', 'step': 11251, 'epoch': 2} {'type': 'loss', 'content': 0.07578674703836441, 'timestamp': '2025-09-30 22:25:58.998015', 'step': 11252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.028223', 'step': 11252, 'epoch': 2} {'type': 'loss', 'content': 0.1738842874765396, 'timestamp': '2025-09-30 22:25:59.036926', 'step': 11253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.073029', 'step': 11253, 'epoch': 2} {'type': 'loss', 'content': 0.14194545149803162, 'timestamp': '2025-09-30 22:25:59.075981', 'step': 11254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.107925', 'step': 11254, 'epoch': 2} {'type': 'loss', 'content': 0.11181244254112244, 'timestamp': '2025-09-30 22:25:59.110927', 'step': 11255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.141265', 'step': 11255, 'epoch': 2} {'type': 'loss', 'content': 0.09004319459199905, 'timestamp': '2025-09-30 22:25:59.166343', 'step': 11256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.198056', 'step': 11256, 'epoch': 2} {'type': 'loss', 'content': 0.028677139431238174, 'timestamp': '2025-09-30 22:25:59.202040', 'step': 11257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.232509', 'step': 11257, 'epoch': 2} {'type': 'loss', 'content': 0.08496114611625671, 'timestamp': '2025-09-30 22:25:59.247234', 'step': 11258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.278817', 'step': 11258, 'epoch': 2} {'type': 'loss', 'content': 0.17345327138900757, 'timestamp': '2025-09-30 22:25:59.285461', 'step': 11259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.319601', 'step': 11259, 'epoch': 2} {'type': 'loss', 'content': 0.062467824667692184, 'timestamp': '2025-09-30 22:25:59.345940', 'step': 11260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.376476', 'step': 11260, 'epoch': 2} {'type': 'loss', 'content': 0.057700034230947495, 'timestamp': '2025-09-30 22:25:59.379521', 'step': 11261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.409624', 'step': 11261, 'epoch': 2} {'type': 'loss', 'content': 0.09433533996343613, 'timestamp': '2025-09-30 22:25:59.412850', 'step': 11262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.446770', 'step': 11262, 'epoch': 2} {'type': 'loss', 'content': 0.044580090790987015, 'timestamp': '2025-09-30 22:25:59.459496', 'step': 11263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.494268', 'step': 11263, 'epoch': 2} {'type': 'loss', 'content': 0.1587046980857849, 'timestamp': '2025-09-30 22:25:59.519494', 'step': 11264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.562554', 'step': 11264, 'epoch': 2} {'type': 'loss', 'content': 0.052268411964178085, 'timestamp': '2025-09-30 22:25:59.567142', 'step': 11265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.599055', 'step': 11265, 'epoch': 2} {'type': 'loss', 'content': 0.06909176707267761, 'timestamp': '2025-09-30 22:25:59.602405', 'step': 11266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.635724', 'step': 11266, 'epoch': 2} {'type': 'loss', 'content': 0.130576029419899, 'timestamp': '2025-09-30 22:25:59.641143', 'step': 11267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.678749', 'step': 11267, 'epoch': 2} {'type': 'loss', 'content': 0.021336520090699196, 'timestamp': '2025-09-30 22:25:59.707463', 'step': 11268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.741820', 'step': 11268, 'epoch': 2} {'type': 'loss', 'content': 0.13266335427761078, 'timestamp': '2025-09-30 22:25:59.746449', 'step': 11269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.782175', 'step': 11269, 'epoch': 2} {'type': 'loss', 'content': 0.07407236099243164, 'timestamp': '2025-09-30 22:25:59.786803', 'step': 11270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:25:59.822382', 'step': 11270, 'epoch': 2} {'type': 'loss', 'content': 0.08478696644306183, 'timestamp': '2025-09-30 22:25:59.834363', 'step': 11271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.870104', 'step': 11271, 'epoch': 2} {'type': 'loss', 'content': 0.12346140295267105, 'timestamp': '2025-09-30 22:25:59.896292', 'step': 11272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:25:59.929309', 'step': 11272, 'epoch': 2} {'type': 'loss', 'content': 0.11474400013685226, 'timestamp': '2025-09-30 22:25:59.933389', 'step': 11273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:25:59.966469', 'step': 11273, 'epoch': 2} {'type': 'loss', 'content': 0.029167018830776215, 'timestamp': '2025-09-30 22:25:59.974231', 'step': 11274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.008796', 'step': 11274, 'epoch': 2} {'type': 'loss', 'content': 0.11171218752861023, 'timestamp': '2025-09-30 22:26:00.011500', 'step': 11275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:00.043160', 'step': 11275, 'epoch': 2} {'type': 'loss', 'content': 0.11594752222299576, 'timestamp': '2025-09-30 22:26:00.070740', 'step': 11276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.104009', 'step': 11276, 'epoch': 2} {'type': 'loss', 'content': 0.11101172119379044, 'timestamp': '2025-09-30 22:26:00.107452', 'step': 11277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:00.140585', 'step': 11277, 'epoch': 2} {'type': 'loss', 'content': 0.04236390069127083, 'timestamp': '2025-09-30 22:26:00.145459', 'step': 11278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:00.182575', 'step': 11278, 'epoch': 2} {'type': 'loss', 'content': 0.18609045445919037, 'timestamp': '2025-09-30 22:26:00.188906', 'step': 11279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.221279', 'step': 11279, 'epoch': 2} {'type': 'loss', 'content': 0.12679721415042877, 'timestamp': '2025-09-30 22:26:00.246982', 'step': 11280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.277923', 'step': 11280, 'epoch': 2} {'type': 'loss', 'content': 0.08636166155338287, 'timestamp': '2025-09-30 22:26:00.283986', 'step': 11281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.318553', 'step': 11281, 'epoch': 2} {'type': 'loss', 'content': 0.12288103252649307, 'timestamp': '2025-09-30 22:26:00.322106', 'step': 11282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.353580', 'step': 11282, 'epoch': 2} {'type': 'loss', 'content': 0.0400443896651268, 'timestamp': '2025-09-30 22:26:00.358028', 'step': 11283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.391383', 'step': 11283, 'epoch': 2} {'type': 'loss', 'content': 0.13508953154087067, 'timestamp': '2025-09-30 22:26:00.417937', 'step': 11284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.451712', 'step': 11284, 'epoch': 2} {'type': 'loss', 'content': 0.08706445246934891, 'timestamp': '2025-09-30 22:26:00.457756', 'step': 11285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:00.491163', 'step': 11285, 'epoch': 2} {'type': 'loss', 'content': 0.1451447308063507, 'timestamp': '2025-09-30 22:26:00.495322', 'step': 11286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.529405', 'step': 11286, 'epoch': 2} {'type': 'loss', 'content': 0.059227053076028824, 'timestamp': '2025-09-30 22:26:00.536351', 'step': 11287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.570486', 'step': 11287, 'epoch': 2} {'type': 'loss', 'content': 0.16990526020526886, 'timestamp': '2025-09-30 22:26:00.599715', 'step': 11288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:00.632381', 'step': 11288, 'epoch': 2} {'type': 'loss', 'content': 0.08123577386140823, 'timestamp': '2025-09-30 22:26:00.637494', 'step': 11289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.670747', 'step': 11289, 'epoch': 2} {'type': 'loss', 'content': 0.07719840109348297, 'timestamp': '2025-09-30 22:26:00.675910', 'step': 11290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.709399', 'step': 11290, 'epoch': 2} {'type': 'loss', 'content': 0.07105648517608643, 'timestamp': '2025-09-30 22:26:00.712342', 'step': 11291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:00.746816', 'step': 11291, 'epoch': 2} {'type': 'loss', 'content': 0.12156601250171661, 'timestamp': '2025-09-30 22:26:00.773452', 'step': 11292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.804940', 'step': 11292, 'epoch': 2} {'type': 'loss', 'content': 0.1296864151954651, 'timestamp': '2025-09-30 22:26:00.810486', 'step': 11293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:00.845222', 'step': 11293, 'epoch': 2} {'type': 'loss', 'content': 0.15079577267169952, 'timestamp': '2025-09-30 22:26:00.849201', 'step': 11294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.898844', 'step': 11294, 'epoch': 2} {'type': 'loss', 'content': 0.1346348226070404, 'timestamp': '2025-09-30 22:26:00.919498', 'step': 11295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:00.967170', 'step': 11295, 'epoch': 2} {'type': 'loss', 'content': 0.1205810084939003, 'timestamp': '2025-09-30 22:26:01.008835', 'step': 11296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.041679', 'step': 11296, 'epoch': 2} {'type': 'loss', 'content': 0.12507782876491547, 'timestamp': '2025-09-30 22:26:01.046782', 'step': 11297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.092099', 'step': 11297, 'epoch': 2} {'type': 'loss', 'content': 0.08856985718011856, 'timestamp': '2025-09-30 22:26:01.095479', 'step': 11298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.129503', 'step': 11298, 'epoch': 2} {'type': 'loss', 'content': 0.09763574600219727, 'timestamp': '2025-09-30 22:26:01.135048', 'step': 11299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.169578', 'step': 11299, 'epoch': 2} {'type': 'loss', 'content': 0.13266466557979584, 'timestamp': '2025-09-30 22:26:01.196159', 'step': 11300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:01.228863', 'step': 11300, 'epoch': 2} {'type': 'loss', 'content': 0.17967839539051056, 'timestamp': '2025-09-30 22:26:01.232962', 'step': 11301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.267495', 'step': 11301, 'epoch': 2} {'type': 'loss', 'content': 0.07019944489002228, 'timestamp': '2025-09-30 22:26:01.274074', 'step': 11302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.306750', 'step': 11302, 'epoch': 2} {'type': 'loss', 'content': 0.11837323755025864, 'timestamp': '2025-09-30 22:26:01.323243', 'step': 11303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:01.356822', 'step': 11303, 'epoch': 2} {'type': 'loss', 'content': 0.1557970643043518, 'timestamp': '2025-09-30 22:26:01.387322', 'step': 11304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.418686', 'step': 11304, 'epoch': 2} {'type': 'loss', 'content': 0.1858334243297577, 'timestamp': '2025-09-30 22:26:01.422851', 'step': 11305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:01.454576', 'step': 11305, 'epoch': 2} {'type': 'loss', 'content': 0.0832560807466507, 'timestamp': '2025-09-30 22:26:01.458550', 'step': 11306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.511368', 'step': 11306, 'epoch': 2} {'type': 'loss', 'content': 0.17176443338394165, 'timestamp': '2025-09-30 22:26:01.515706', 'step': 11307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.549791', 'step': 11307, 'epoch': 2} {'type': 'loss', 'content': 0.20329247415065765, 'timestamp': '2025-09-30 22:26:01.574541', 'step': 11308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.619291', 'step': 11308, 'epoch': 2} {'type': 'loss', 'content': 0.15207794308662415, 'timestamp': '2025-09-30 22:26:01.624588', 'step': 11309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.655909', 'step': 11309, 'epoch': 2} {'type': 'loss', 'content': 0.10600924491882324, 'timestamp': '2025-09-30 22:26:01.660645', 'step': 11310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.692418', 'step': 11310, 'epoch': 2} {'type': 'loss', 'content': 0.11247666925191879, 'timestamp': '2025-09-30 22:26:01.697855', 'step': 11311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.747169', 'step': 11311, 'epoch': 2} {'type': 'loss', 'content': 0.08011762797832489, 'timestamp': '2025-09-30 22:26:01.775055', 'step': 11312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:01.806641', 'step': 11312, 'epoch': 2} {'type': 'loss', 'content': 0.1557379513978958, 'timestamp': '2025-09-30 22:26:01.811402', 'step': 11313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:01.855002', 'step': 11313, 'epoch': 2} {'type': 'loss', 'content': 0.1633201390504837, 'timestamp': '2025-09-30 22:26:01.860855', 'step': 11314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:01.898830', 'step': 11314, 'epoch': 2} {'type': 'loss', 'content': 0.1496136635541916, 'timestamp': '2025-09-30 22:26:01.906006', 'step': 11315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:01.946061', 'step': 11315, 'epoch': 2} {'type': 'loss', 'content': 0.12704689800739288, 'timestamp': '2025-09-30 22:26:01.973510', 'step': 11316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.013429', 'step': 11316, 'epoch': 2} {'type': 'loss', 'content': 0.13414055109024048, 'timestamp': '2025-09-30 22:26:02.019152', 'step': 11317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.066102', 'step': 11317, 'epoch': 2} {'type': 'loss', 'content': 0.11488375812768936, 'timestamp': '2025-09-30 22:26:02.077380', 'step': 11318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:02.109747', 'step': 11318, 'epoch': 2} {'type': 'loss', 'content': 0.06249095872044563, 'timestamp': '2025-09-30 22:26:02.113707', 'step': 11319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:02.149302', 'step': 11319, 'epoch': 2} {'type': 'loss', 'content': 0.060647670179605484, 'timestamp': '2025-09-30 22:26:02.175845', 'step': 11320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.223903', 'step': 11320, 'epoch': 2} {'type': 'loss', 'content': 0.18943016231060028, 'timestamp': '2025-09-30 22:26:02.228957', 'step': 11321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.278572', 'step': 11321, 'epoch': 2} {'type': 'loss', 'content': 0.125050887465477, 'timestamp': '2025-09-30 22:26:02.283509', 'step': 11322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:02.315318', 'step': 11322, 'epoch': 2} {'type': 'loss', 'content': 0.0974065512418747, 'timestamp': '2025-09-30 22:26:02.319975', 'step': 11323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:02.350056', 'step': 11323, 'epoch': 2} {'type': 'loss', 'content': 0.04914594441652298, 'timestamp': '2025-09-30 22:26:02.375336', 'step': 11324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:02.408071', 'step': 11324, 'epoch': 2} {'type': 'loss', 'content': 0.19056372344493866, 'timestamp': '2025-09-30 22:26:02.417714', 'step': 11325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:02.455518', 'step': 11325, 'epoch': 2} {'type': 'loss', 'content': 0.1969711184501648, 'timestamp': '2025-09-30 22:26:02.460583', 'step': 11326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.492917', 'step': 11326, 'epoch': 2} {'type': 'loss', 'content': 0.05914028361439705, 'timestamp': '2025-09-30 22:26:02.497097', 'step': 11327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:02.550778', 'step': 11327, 'epoch': 2} {'type': 'loss', 'content': 0.040934234857559204, 'timestamp': '2025-09-30 22:26:02.575656', 'step': 11328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.606056', 'step': 11328, 'epoch': 2} {'type': 'loss', 'content': 0.11281910538673401, 'timestamp': '2025-09-30 22:26:02.610704', 'step': 11329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:02.643484', 'step': 11329, 'epoch': 2} {'type': 'loss', 'content': 0.1355958729982376, 'timestamp': '2025-09-30 22:26:02.652614', 'step': 11330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:02.690010', 'step': 11330, 'epoch': 2} {'type': 'loss', 'content': 0.056691840291023254, 'timestamp': '2025-09-30 22:26:02.697590', 'step': 11331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:02.731274', 'step': 11331, 'epoch': 2} {'type': 'loss', 'content': 0.23033154010772705, 'timestamp': '2025-09-30 22:26:02.759345', 'step': 11332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:02.801155', 'step': 11332, 'epoch': 2} {'type': 'loss', 'content': 0.18361511826515198, 'timestamp': '2025-09-30 22:26:02.804240', 'step': 11333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:02.836028', 'step': 11333, 'epoch': 2} {'type': 'loss', 'content': 0.112602598965168, 'timestamp': '2025-09-30 22:26:02.839625', 'step': 11334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:02.871866', 'step': 11334, 'epoch': 2} {'type': 'loss', 'content': 0.05617828667163849, 'timestamp': '2025-09-30 22:26:02.876262', 'step': 11335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:02.910459', 'step': 11335, 'epoch': 2} {'type': 'loss', 'content': 0.05337819084525108, 'timestamp': '2025-09-30 22:26:02.942730', 'step': 11336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:02.977585', 'step': 11336, 'epoch': 2} {'type': 'loss', 'content': 0.05980068817734718, 'timestamp': '2025-09-30 22:26:02.988610', 'step': 11337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.033990', 'step': 11337, 'epoch': 2} {'type': 'loss', 'content': 0.0586063377559185, 'timestamp': '2025-09-30 22:26:03.038521', 'step': 11338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:03.070579', 'step': 11338, 'epoch': 2} {'type': 'loss', 'content': 0.14002886414527893, 'timestamp': '2025-09-30 22:26:03.078116', 'step': 11339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:03.111680', 'step': 11339, 'epoch': 2} {'type': 'loss', 'content': 0.07057231664657593, 'timestamp': '2025-09-30 22:26:03.151611', 'step': 11340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:03.189260', 'step': 11340, 'epoch': 2} {'type': 'loss', 'content': 0.13252678513526917, 'timestamp': '2025-09-30 22:26:03.193338', 'step': 11341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.227028', 'step': 11341, 'epoch': 2} {'type': 'loss', 'content': 0.11239694058895111, 'timestamp': '2025-09-30 22:26:03.231134', 'step': 11342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.262653', 'step': 11342, 'epoch': 2} {'type': 'loss', 'content': 0.06495292484760284, 'timestamp': '2025-09-30 22:26:03.267696', 'step': 11343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.300745', 'step': 11343, 'epoch': 2} {'type': 'loss', 'content': 0.1610691249370575, 'timestamp': '2025-09-30 22:26:03.339428', 'step': 11344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.372717', 'step': 11344, 'epoch': 2} {'type': 'loss', 'content': 0.14595341682434082, 'timestamp': '2025-09-30 22:26:03.381837', 'step': 11345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.426562', 'step': 11345, 'epoch': 2} {'type': 'loss', 'content': 0.06905759125947952, 'timestamp': '2025-09-30 22:26:03.431153', 'step': 11346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:03.462909', 'step': 11346, 'epoch': 2} {'type': 'loss', 'content': 0.10015121847391129, 'timestamp': '2025-09-30 22:26:03.466600', 'step': 11347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.498154', 'step': 11347, 'epoch': 2} {'type': 'loss', 'content': 0.13617432117462158, 'timestamp': '2025-09-30 22:26:03.522615', 'step': 11348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.555331', 'step': 11348, 'epoch': 2} {'type': 'loss', 'content': 0.13698932528495789, 'timestamp': '2025-09-30 22:26:03.558950', 'step': 11349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.592056', 'step': 11349, 'epoch': 2} {'type': 'loss', 'content': 0.14445139467716217, 'timestamp': '2025-09-30 22:26:03.595472', 'step': 11350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.628846', 'step': 11350, 'epoch': 2} {'type': 'loss', 'content': 0.10605856776237488, 'timestamp': '2025-09-30 22:26:03.636551', 'step': 11351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:03.669446', 'step': 11351, 'epoch': 2} {'type': 'loss', 'content': 0.1533539593219757, 'timestamp': '2025-09-30 22:26:03.694410', 'step': 11352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.731729', 'step': 11352, 'epoch': 2} {'type': 'loss', 'content': 0.051413800567388535, 'timestamp': '2025-09-30 22:26:03.736170', 'step': 11353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:03.769049', 'step': 11353, 'epoch': 2} {'type': 'loss', 'content': 0.11239632219076157, 'timestamp': '2025-09-30 22:26:03.773312', 'step': 11354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:03.804713', 'step': 11354, 'epoch': 2} {'type': 'loss', 'content': 0.12751442193984985, 'timestamp': '2025-09-30 22:26:03.815237', 'step': 11355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.861480', 'step': 11355, 'epoch': 2} {'type': 'loss', 'content': 0.08499442785978317, 'timestamp': '2025-09-30 22:26:03.893887', 'step': 11356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:03.935578', 'step': 11356, 'epoch': 2} {'type': 'loss', 'content': 0.11175978183746338, 'timestamp': '2025-09-30 22:26:03.947263', 'step': 11357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:03.978751', 'step': 11357, 'epoch': 2} {'type': 'loss', 'content': 0.0948060154914856, 'timestamp': '2025-09-30 22:26:03.981731', 'step': 11358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:04.013164', 'step': 11358, 'epoch': 2} {'type': 'loss', 'content': 0.09144040942192078, 'timestamp': '2025-09-30 22:26:04.017028', 'step': 11359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:04.047832', 'step': 11359, 'epoch': 2} {'type': 'loss', 'content': 0.1511705219745636, 'timestamp': '2025-09-30 22:26:04.073347', 'step': 11360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:04.106377', 'step': 11360, 'epoch': 2} {'type': 'loss', 'content': 0.1486176699399948, 'timestamp': '2025-09-30 22:26:04.109610', 'step': 11361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:04.140886', 'step': 11361, 'epoch': 2} {'type': 'loss', 'content': 0.059756796807050705, 'timestamp': '2025-09-30 22:26:04.143502', 'step': 11362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:04.173908', 'step': 11362, 'epoch': 2} {'type': 'loss', 'content': 0.08367817848920822, 'timestamp': '2025-09-30 22:26:04.177209', 'step': 11363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:04.207318', 'step': 11363, 'epoch': 2} {'type': 'loss', 'content': 0.06462627649307251, 'timestamp': '2025-09-30 22:26:04.232583', 'step': 11364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.263161', 'step': 11364, 'epoch': 2} {'type': 'loss', 'content': 0.11376067996025085, 'timestamp': '2025-09-30 22:26:04.265727', 'step': 11365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:04.298396', 'step': 11365, 'epoch': 2} {'type': 'loss', 'content': 0.09285006672143936, 'timestamp': '2025-09-30 22:26:04.306245', 'step': 11366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.338573', 'step': 11366, 'epoch': 2} {'type': 'loss', 'content': 0.12349416315555573, 'timestamp': '2025-09-30 22:26:04.341272', 'step': 11367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.378752', 'step': 11367, 'epoch': 2} {'type': 'loss', 'content': 0.050738610327243805, 'timestamp': '2025-09-30 22:26:04.404839', 'step': 11368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:04.437091', 'step': 11368, 'epoch': 2} {'type': 'loss', 'content': 0.14261114597320557, 'timestamp': '2025-09-30 22:26:04.440058', 'step': 11369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.473414', 'step': 11369, 'epoch': 2} {'type': 'loss', 'content': 0.1369011551141739, 'timestamp': '2025-09-30 22:26:04.476329', 'step': 11370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.506955', 'step': 11370, 'epoch': 2} {'type': 'loss', 'content': 0.08839230984449387, 'timestamp': '2025-09-30 22:26:04.510786', 'step': 11371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.541607', 'step': 11371, 'epoch': 2} {'type': 'loss', 'content': 0.08940660953521729, 'timestamp': '2025-09-30 22:26:04.568004', 'step': 11372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.599455', 'step': 11372, 'epoch': 2} {'type': 'loss', 'content': 0.08737123757600784, 'timestamp': '2025-09-30 22:26:04.610408', 'step': 11373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:04.649063', 'step': 11373, 'epoch': 2} {'type': 'loss', 'content': 0.06494182348251343, 'timestamp': '2025-09-30 22:26:04.660256', 'step': 11374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.699308', 'step': 11374, 'epoch': 2} {'type': 'loss', 'content': 0.16143417358398438, 'timestamp': '2025-09-30 22:26:04.709685', 'step': 11375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:04.750411', 'step': 11375, 'epoch': 2} {'type': 'loss', 'content': 0.04964820668101311, 'timestamp': '2025-09-30 22:26:04.783759', 'step': 11376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.822187', 'step': 11376, 'epoch': 2} {'type': 'loss', 'content': 0.142242431640625, 'timestamp': '2025-09-30 22:26:04.825481', 'step': 11377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.857597', 'step': 11377, 'epoch': 2} {'type': 'loss', 'content': 0.1961018294095993, 'timestamp': '2025-09-30 22:26:04.863804', 'step': 11378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:04.896134', 'step': 11378, 'epoch': 2} {'type': 'loss', 'content': 0.22537538409233093, 'timestamp': '2025-09-30 22:26:04.905206', 'step': 11379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:04.937329', 'step': 11379, 'epoch': 2} {'type': 'loss', 'content': 0.12547166645526886, 'timestamp': '2025-09-30 22:26:04.962223', 'step': 11380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:04.992833', 'step': 11380, 'epoch': 2} {'type': 'loss', 'content': 0.09646914899349213, 'timestamp': '2025-09-30 22:26:04.996258', 'step': 11381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.026962', 'step': 11381, 'epoch': 2} {'type': 'loss', 'content': 0.13619191944599152, 'timestamp': '2025-09-30 22:26:05.029992', 'step': 11382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:05.064508', 'step': 11382, 'epoch': 2} {'type': 'loss', 'content': 0.10175801813602448, 'timestamp': '2025-09-30 22:26:05.067561', 'step': 11383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.098768', 'step': 11383, 'epoch': 2} {'type': 'loss', 'content': 0.1014455035328865, 'timestamp': '2025-09-30 22:26:05.133208', 'step': 11384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.164800', 'step': 11384, 'epoch': 2} {'type': 'loss', 'content': 0.07247625291347504, 'timestamp': '2025-09-30 22:26:05.174298', 'step': 11385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.208737', 'step': 11385, 'epoch': 2} {'type': 'loss', 'content': 0.14356811344623566, 'timestamp': '2025-09-30 22:26:05.211753', 'step': 11386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.242269', 'step': 11386, 'epoch': 2} {'type': 'loss', 'content': 0.05046659708023071, 'timestamp': '2025-09-30 22:26:05.247508', 'step': 11387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.282025', 'step': 11387, 'epoch': 2} {'type': 'loss', 'content': 0.08079420030117035, 'timestamp': '2025-09-30 22:26:05.307194', 'step': 11388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.338395', 'step': 11388, 'epoch': 2} {'type': 'loss', 'content': 0.07978176325559616, 'timestamp': '2025-09-30 22:26:05.341667', 'step': 11389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:05.373102', 'step': 11389, 'epoch': 2} {'type': 'loss', 'content': 0.07162396609783173, 'timestamp': '2025-09-30 22:26:05.385608', 'step': 11390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.425045', 'step': 11390, 'epoch': 2} {'type': 'loss', 'content': 0.1699850857257843, 'timestamp': '2025-09-30 22:26:05.428018', 'step': 11391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.464809', 'step': 11391, 'epoch': 2} {'type': 'loss', 'content': 0.10141758620738983, 'timestamp': '2025-09-30 22:26:05.488865', 'step': 11392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:05.519760', 'step': 11392, 'epoch': 2} {'type': 'loss', 'content': 0.10746783763170242, 'timestamp': '2025-09-30 22:26:05.531757', 'step': 11393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.562183', 'step': 11393, 'epoch': 2} {'type': 'loss', 'content': 0.09545595943927765, 'timestamp': '2025-09-30 22:26:05.565218', 'step': 11394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.597188', 'step': 11394, 'epoch': 2} {'type': 'loss', 'content': 0.1534363031387329, 'timestamp': '2025-09-30 22:26:05.607748', 'step': 11395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.640922', 'step': 11395, 'epoch': 2} {'type': 'loss', 'content': 0.042487047612667084, 'timestamp': '2025-09-30 22:26:05.665497', 'step': 11396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.700220', 'step': 11396, 'epoch': 2} {'type': 'loss', 'content': 0.13249115645885468, 'timestamp': '2025-09-30 22:26:05.702657', 'step': 11397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:05.734170', 'step': 11397, 'epoch': 2} {'type': 'loss', 'content': 0.13529884815216064, 'timestamp': '2025-09-30 22:26:05.739334', 'step': 11398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:05.770938', 'step': 11398, 'epoch': 2} {'type': 'loss', 'content': 0.11222641170024872, 'timestamp': '2025-09-30 22:26:05.774042', 'step': 11399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:05.805258', 'step': 11399, 'epoch': 2} {'type': 'loss', 'content': 0.13174307346343994, 'timestamp': '2025-09-30 22:26:05.829980', 'step': 11400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:05.860753', 'step': 11400, 'epoch': 2} {'type': 'loss', 'content': 0.12912434339523315, 'timestamp': '2025-09-30 22:26:05.864397', 'step': 11401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.896511', 'step': 11401, 'epoch': 2} {'type': 'loss', 'content': 0.15830284357070923, 'timestamp': '2025-09-30 22:26:05.900586', 'step': 11402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:05.931773', 'step': 11402, 'epoch': 2} {'type': 'loss', 'content': 0.12883293628692627, 'timestamp': '2025-09-30 22:26:05.935193', 'step': 11403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:05.975590', 'step': 11403, 'epoch': 2} {'type': 'loss', 'content': 0.16466325521469116, 'timestamp': '2025-09-30 22:26:06.009296', 'step': 11404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:06.040688', 'step': 11404, 'epoch': 2} {'type': 'loss', 'content': 0.13648512959480286, 'timestamp': '2025-09-30 22:26:06.052390', 'step': 11405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:06.086427', 'step': 11405, 'epoch': 2} {'type': 'loss', 'content': 0.14864127337932587, 'timestamp': '2025-09-30 22:26:06.089731', 'step': 11406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:06.121578', 'step': 11406, 'epoch': 2} {'type': 'loss', 'content': 0.1008404865860939, 'timestamp': '2025-09-30 22:26:06.132880', 'step': 11407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.173111', 'step': 11407, 'epoch': 2} {'type': 'loss', 'content': 0.1561398208141327, 'timestamp': '2025-09-30 22:26:06.205821', 'step': 11408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:06.237180', 'step': 11408, 'epoch': 2} {'type': 'loss', 'content': 0.08711964637041092, 'timestamp': '2025-09-30 22:26:06.250990', 'step': 11409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.291680', 'step': 11409, 'epoch': 2} {'type': 'loss', 'content': 0.11945119500160217, 'timestamp': '2025-09-30 22:26:06.295051', 'step': 11410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.326105', 'step': 11410, 'epoch': 2} {'type': 'loss', 'content': 0.2259361445903778, 'timestamp': '2025-09-30 22:26:06.330882', 'step': 11411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:06.361931', 'step': 11411, 'epoch': 2} {'type': 'loss', 'content': 0.09341055899858475, 'timestamp': '2025-09-30 22:26:06.394346', 'step': 11412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.426653', 'step': 11412, 'epoch': 2} {'type': 'loss', 'content': 0.14951996505260468, 'timestamp': '2025-09-30 22:26:06.437358', 'step': 11413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:06.477163', 'step': 11413, 'epoch': 2} {'type': 'loss', 'content': 0.03493836149573326, 'timestamp': '2025-09-30 22:26:06.479914', 'step': 11414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:06.516756', 'step': 11414, 'epoch': 2} {'type': 'loss', 'content': 0.07169803231954575, 'timestamp': '2025-09-30 22:26:06.524796', 'step': 11415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.562374', 'step': 11415, 'epoch': 2} {'type': 'loss', 'content': 0.0560186542570591, 'timestamp': '2025-09-30 22:26:06.594288', 'step': 11416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:06.630819', 'step': 11416, 'epoch': 2} {'type': 'loss', 'content': 0.07082716375589371, 'timestamp': '2025-09-30 22:26:06.639523', 'step': 11417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:06.678557', 'step': 11417, 'epoch': 2} {'type': 'loss', 'content': 0.14954808354377747, 'timestamp': '2025-09-30 22:26:06.684921', 'step': 11418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.715846', 'step': 11418, 'epoch': 2} {'type': 'loss', 'content': 0.10305476933717728, 'timestamp': '2025-09-30 22:26:06.718759', 'step': 11419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:06.749340', 'step': 11419, 'epoch': 2} {'type': 'loss', 'content': 0.05624648928642273, 'timestamp': '2025-09-30 22:26:06.781497', 'step': 11420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:06.812361', 'step': 11420, 'epoch': 2} {'type': 'loss', 'content': 0.1501762717962265, 'timestamp': '2025-09-30 22:26:06.817486', 'step': 11421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:06.848167', 'step': 11421, 'epoch': 2} {'type': 'loss', 'content': 0.08362039923667908, 'timestamp': '2025-09-30 22:26:06.851953', 'step': 11422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:06.882827', 'step': 11422, 'epoch': 2} {'type': 'loss', 'content': 0.11881145089864731, 'timestamp': '2025-09-30 22:26:06.893735', 'step': 11423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:06.934537', 'step': 11423, 'epoch': 2} {'type': 'loss', 'content': 0.08955436199903488, 'timestamp': '2025-09-30 22:26:06.968097', 'step': 11424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:07.005133', 'step': 11424, 'epoch': 2} {'type': 'loss', 'content': 0.1136169359087944, 'timestamp': '2025-09-30 22:26:07.015456', 'step': 11425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:07.059925', 'step': 11425, 'epoch': 2} {'type': 'loss', 'content': 0.22425350546836853, 'timestamp': '2025-09-30 22:26:07.069760', 'step': 11426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.106265', 'step': 11426, 'epoch': 2} {'type': 'loss', 'content': 0.06078783795237541, 'timestamp': '2025-09-30 22:26:07.109812', 'step': 11427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:07.144275', 'step': 11427, 'epoch': 2} {'type': 'loss', 'content': 0.07132240384817123, 'timestamp': '2025-09-30 22:26:07.174093', 'step': 11428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.211604', 'step': 11428, 'epoch': 2} {'type': 'loss', 'content': 0.20609109103679657, 'timestamp': '2025-09-30 22:26:07.220059', 'step': 11429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:07.256424', 'step': 11429, 'epoch': 2} {'type': 'loss', 'content': 0.16135254502296448, 'timestamp': '2025-09-30 22:26:07.264782', 'step': 11430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:07.300809', 'step': 11430, 'epoch': 2} {'type': 'loss', 'content': 0.06658248603343964, 'timestamp': '2025-09-30 22:26:07.306625', 'step': 11431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:07.340354', 'step': 11431, 'epoch': 2} {'type': 'loss', 'content': 0.10752774029970169, 'timestamp': '2025-09-30 22:26:07.370335', 'step': 11432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:07.405691', 'step': 11432, 'epoch': 2} {'type': 'loss', 'content': 0.13906824588775635, 'timestamp': '2025-09-30 22:26:07.408930', 'step': 11433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.440553', 'step': 11433, 'epoch': 2} {'type': 'loss', 'content': 0.11116492748260498, 'timestamp': '2025-09-30 22:26:07.444006', 'step': 11434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:07.475343', 'step': 11434, 'epoch': 2} {'type': 'loss', 'content': 0.10908622294664383, 'timestamp': '2025-09-30 22:26:07.481800', 'step': 11435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:07.512536', 'step': 11435, 'epoch': 2} {'type': 'loss', 'content': 0.08454188704490662, 'timestamp': '2025-09-30 22:26:07.541997', 'step': 11436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:07.573180', 'step': 11436, 'epoch': 2} {'type': 'loss', 'content': 0.12638118863105774, 'timestamp': '2025-09-30 22:26:07.576726', 'step': 11437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:07.608073', 'step': 11437, 'epoch': 2} {'type': 'loss', 'content': 0.10106005519628525, 'timestamp': '2025-09-30 22:26:07.618820', 'step': 11438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:07.655840', 'step': 11438, 'epoch': 2} {'type': 'loss', 'content': 0.17423880100250244, 'timestamp': '2025-09-30 22:26:07.660885', 'step': 11439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:07.708409', 'step': 11439, 'epoch': 2} {'type': 'loss', 'content': 0.09529273957014084, 'timestamp': '2025-09-30 22:26:07.734258', 'step': 11440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:07.765677', 'step': 11440, 'epoch': 2} {'type': 'loss', 'content': 0.07334662973880768, 'timestamp': '2025-09-30 22:26:07.769155', 'step': 11441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:26:07.802906', 'step': 11441, 'epoch': 2} {'type': 'loss', 'content': 0.10850928723812103, 'timestamp': '2025-09-30 22:26:07.808868', 'step': 11442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.840993', 'step': 11442, 'epoch': 2} {'type': 'loss', 'content': 0.0872264951467514, 'timestamp': '2025-09-30 22:26:07.845445', 'step': 11443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.879659', 'step': 11443, 'epoch': 2} {'type': 'loss', 'content': 0.05927591770887375, 'timestamp': '2025-09-30 22:26:07.907699', 'step': 11444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:07.940336', 'step': 11444, 'epoch': 2} {'type': 'loss', 'content': 0.1046263575553894, 'timestamp': '2025-09-30 22:26:07.945239', 'step': 11445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:07.976902', 'step': 11445, 'epoch': 2} {'type': 'loss', 'content': 0.05545692518353462, 'timestamp': '2025-09-30 22:26:07.984367', 'step': 11446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.016894', 'step': 11446, 'epoch': 2} {'type': 'loss', 'content': 0.09363555163145065, 'timestamp': '2025-09-30 22:26:08.025896', 'step': 11447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.063770', 'step': 11447, 'epoch': 2} {'type': 'loss', 'content': 0.06143581122159958, 'timestamp': '2025-09-30 22:26:08.094682', 'step': 11448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.125683', 'step': 11448, 'epoch': 2} {'type': 'loss', 'content': 0.09063828736543655, 'timestamp': '2025-09-30 22:26:08.129246', 'step': 11449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:26:08.172427', 'step': 11449, 'epoch': 2} {'type': 'loss', 'content': 0.08670009672641754, 'timestamp': '2025-09-30 22:26:08.181685', 'step': 11450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.213456', 'step': 11450, 'epoch': 2} {'type': 'loss', 'content': 0.1118638813495636, 'timestamp': '2025-09-30 22:26:08.217461', 'step': 11451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.249262', 'step': 11451, 'epoch': 2} {'type': 'loss', 'content': 0.05389033630490303, 'timestamp': '2025-09-30 22:26:08.276505', 'step': 11452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.309752', 'step': 11452, 'epoch': 2} {'type': 'loss', 'content': 0.05714770406484604, 'timestamp': '2025-09-30 22:26:08.313142', 'step': 11453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.350762', 'step': 11453, 'epoch': 2} {'type': 'loss', 'content': 0.06905084103345871, 'timestamp': '2025-09-30 22:26:08.353997', 'step': 11454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.389190', 'step': 11454, 'epoch': 2} {'type': 'loss', 'content': 0.04254130274057388, 'timestamp': '2025-09-30 22:26:08.392013', 'step': 11455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.422713', 'step': 11455, 'epoch': 2} {'type': 'loss', 'content': 0.12625350058078766, 'timestamp': '2025-09-30 22:26:08.446881', 'step': 11456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.479988', 'step': 11456, 'epoch': 2} {'type': 'loss', 'content': 0.06657639145851135, 'timestamp': '2025-09-30 22:26:08.484221', 'step': 11457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.514302', 'step': 11457, 'epoch': 2} {'type': 'loss', 'content': 0.189825639128685, 'timestamp': '2025-09-30 22:26:08.517861', 'step': 11458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.551095', 'step': 11458, 'epoch': 2} {'type': 'loss', 'content': 0.11604204773902893, 'timestamp': '2025-09-30 22:26:08.556725', 'step': 11459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:08.587656', 'step': 11459, 'epoch': 2} {'type': 'loss', 'content': 0.14156749844551086, 'timestamp': '2025-09-30 22:26:08.613829', 'step': 11460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.645104', 'step': 11460, 'epoch': 2} {'type': 'loss', 'content': 0.10368072986602783, 'timestamp': '2025-09-30 22:26:08.651502', 'step': 11461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.684282', 'step': 11461, 'epoch': 2} {'type': 'loss', 'content': 0.059316422790288925, 'timestamp': '2025-09-30 22:26:08.689005', 'step': 11462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.719433', 'step': 11462, 'epoch': 2} {'type': 'loss', 'content': 0.09627800434827805, 'timestamp': '2025-09-30 22:26:08.723504', 'step': 11463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:08.754914', 'step': 11463, 'epoch': 2} {'type': 'loss', 'content': 0.08797930181026459, 'timestamp': '2025-09-30 22:26:08.779613', 'step': 11464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:08.810055', 'step': 11464, 'epoch': 2} {'type': 'loss', 'content': 0.2020048052072525, 'timestamp': '2025-09-30 22:26:08.813075', 'step': 11465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:08.843676', 'step': 11465, 'epoch': 2} {'type': 'loss', 'content': 0.19968049228191376, 'timestamp': '2025-09-30 22:26:08.850787', 'step': 11466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.883041', 'step': 11466, 'epoch': 2} {'type': 'loss', 'content': 0.12495573610067368, 'timestamp': '2025-09-30 22:26:08.892826', 'step': 11467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:08.928906', 'step': 11467, 'epoch': 2} {'type': 'loss', 'content': 0.09558307379484177, 'timestamp': '2025-09-30 22:26:08.958435', 'step': 11468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:08.995315', 'step': 11468, 'epoch': 2} {'type': 'loss', 'content': 0.17534762620925903, 'timestamp': '2025-09-30 22:26:08.998526', 'step': 11469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.029384', 'step': 11469, 'epoch': 2} {'type': 'loss', 'content': 0.09368886798620224, 'timestamp': '2025-09-30 22:26:09.032121', 'step': 11470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:09.064096', 'step': 11470, 'epoch': 2} {'type': 'loss', 'content': 0.11343677341938019, 'timestamp': '2025-09-30 22:26:09.066936', 'step': 11471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:09.097258', 'step': 11471, 'epoch': 2} {'type': 'loss', 'content': 0.09377525001764297, 'timestamp': '2025-09-30 22:26:09.130162', 'step': 11472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:09.163597', 'step': 11472, 'epoch': 2} {'type': 'loss', 'content': 0.19423584640026093, 'timestamp': '2025-09-30 22:26:09.171408', 'step': 11473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.203457', 'step': 11473, 'epoch': 2} {'type': 'loss', 'content': 0.07442519813776016, 'timestamp': '2025-09-30 22:26:09.209754', 'step': 11474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:09.242907', 'step': 11474, 'epoch': 2} {'type': 'loss', 'content': 0.09931798279285431, 'timestamp': '2025-09-30 22:26:09.248155', 'step': 11475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:09.283466', 'step': 11475, 'epoch': 2} {'type': 'loss', 'content': 0.12490146607160568, 'timestamp': '2025-09-30 22:26:09.307942', 'step': 11476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.338778', 'step': 11476, 'epoch': 2} {'type': 'loss', 'content': 0.06714818626642227, 'timestamp': '2025-09-30 22:26:09.343176', 'step': 11477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.374770', 'step': 11477, 'epoch': 2} {'type': 'loss', 'content': 0.13915517926216125, 'timestamp': '2025-09-30 22:26:09.380243', 'step': 11478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.413503', 'step': 11478, 'epoch': 2} {'type': 'loss', 'content': 0.12076731026172638, 'timestamp': '2025-09-30 22:26:09.418511', 'step': 11479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.462612', 'step': 11479, 'epoch': 2} {'type': 'loss', 'content': 0.06390862911939621, 'timestamp': '2025-09-30 22:26:09.488680', 'step': 11480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:09.521792', 'step': 11480, 'epoch': 2} {'type': 'loss', 'content': 0.09825065732002258, 'timestamp': '2025-09-30 22:26:09.528769', 'step': 11481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.560117', 'step': 11481, 'epoch': 2} {'type': 'loss', 'content': 0.23144344985485077, 'timestamp': '2025-09-30 22:26:09.563635', 'step': 11482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.595800', 'step': 11482, 'epoch': 2} {'type': 'loss', 'content': 0.10942316800355911, 'timestamp': '2025-09-30 22:26:09.603508', 'step': 11483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.638377', 'step': 11483, 'epoch': 2} {'type': 'loss', 'content': 0.11486612260341644, 'timestamp': '2025-09-30 22:26:09.665387', 'step': 11484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.701222', 'step': 11484, 'epoch': 2} {'type': 'loss', 'content': 0.03831062465906143, 'timestamp': '2025-09-30 22:26:09.707372', 'step': 11485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:09.740493', 'step': 11485, 'epoch': 2} {'type': 'loss', 'content': 0.06373761594295502, 'timestamp': '2025-09-30 22:26:09.745123', 'step': 11486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.779787', 'step': 11486, 'epoch': 2} {'type': 'loss', 'content': 0.07471320778131485, 'timestamp': '2025-09-30 22:26:09.785568', 'step': 11487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:09.818800', 'step': 11487, 'epoch': 2} {'type': 'loss', 'content': 0.07294552028179169, 'timestamp': '2025-09-30 22:26:09.846834', 'step': 11488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:09.877899', 'step': 11488, 'epoch': 2} {'type': 'loss', 'content': 0.1492919772863388, 'timestamp': '2025-09-30 22:26:09.882556', 'step': 11489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.913329', 'step': 11489, 'epoch': 2} {'type': 'loss', 'content': 0.09253688901662827, 'timestamp': '2025-09-30 22:26:09.918559', 'step': 11490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:09.952088', 'step': 11490, 'epoch': 2} {'type': 'loss', 'content': 0.10276883840560913, 'timestamp': '2025-09-30 22:26:09.957249', 'step': 11491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:09.988029', 'step': 11491, 'epoch': 2} {'type': 'loss', 'content': 0.10757552832365036, 'timestamp': '2025-09-30 22:26:10.013804', 'step': 11492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:10.045944', 'step': 11492, 'epoch': 2} {'type': 'loss', 'content': 0.12270037084817886, 'timestamp': '2025-09-30 22:26:10.056456', 'step': 11493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:10.089154', 'step': 11493, 'epoch': 2} {'type': 'loss', 'content': 0.15013889968395233, 'timestamp': '2025-09-30 22:26:10.092825', 'step': 11494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:10.124714', 'step': 11494, 'epoch': 2} {'type': 'loss', 'content': 0.09066323935985565, 'timestamp': '2025-09-30 22:26:10.129823', 'step': 11495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:10.161615', 'step': 11495, 'epoch': 2} {'type': 'loss', 'content': 0.1348034143447876, 'timestamp': '2025-09-30 22:26:10.186598', 'step': 11496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:10.219180', 'step': 11496, 'epoch': 2} {'type': 'loss', 'content': 0.10882771015167236, 'timestamp': '2025-09-30 22:26:10.222350', 'step': 11497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:10.254996', 'step': 11497, 'epoch': 2} {'type': 'loss', 'content': 0.06704007089138031, 'timestamp': '2025-09-30 22:26:10.259721', 'step': 11498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:10.292981', 'step': 11498, 'epoch': 2} {'type': 'loss', 'content': 0.1528395712375641, 'timestamp': '2025-09-30 22:26:10.299830', 'step': 11499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:10.333069', 'step': 11499, 'epoch': 2} {'type': 'loss', 'content': 0.051465652883052826, 'timestamp': '2025-09-30 22:26:10.358790', 'step': 11500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 11500', 'timestamp': '2025-09-30 22:26:15.414590', 'step': 11500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.448966', 'step': 11500, 'epoch': 2} {'type': 'loss', 'content': 0.13604463636875153, 'timestamp': '2025-09-30 22:26:15.451481', 'step': 11501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.486100', 'step': 11501, 'epoch': 2} {'type': 'loss', 'content': 0.11418726295232773, 'timestamp': '2025-09-30 22:26:15.489348', 'step': 11502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.526753', 'step': 11502, 'epoch': 2} {'type': 'loss', 'content': 0.16615542769432068, 'timestamp': '2025-09-30 22:26:15.529877', 'step': 11503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:15.570959', 'step': 11503, 'epoch': 2} {'type': 'loss', 'content': 0.1044730618596077, 'timestamp': '2025-09-30 22:26:15.595579', 'step': 11504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:15.627805', 'step': 11504, 'epoch': 2} {'type': 'loss', 'content': 0.09704738110303879, 'timestamp': '2025-09-30 22:26:15.635473', 'step': 11505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.671073', 'step': 11505, 'epoch': 2} {'type': 'loss', 'content': 0.04358460381627083, 'timestamp': '2025-09-30 22:26:15.676938', 'step': 11506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:15.708766', 'step': 11506, 'epoch': 2} {'type': 'loss', 'content': 0.06900802254676819, 'timestamp': '2025-09-30 22:26:15.715645', 'step': 11507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:15.752293', 'step': 11507, 'epoch': 2} {'type': 'loss', 'content': 0.13212305307388306, 'timestamp': '2025-09-30 22:26:15.777534', 'step': 11508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.808887', 'step': 11508, 'epoch': 2} {'type': 'loss', 'content': 0.08043908327817917, 'timestamp': '2025-09-30 22:26:15.814021', 'step': 11509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.856192', 'step': 11509, 'epoch': 2} {'type': 'loss', 'content': 0.19118261337280273, 'timestamp': '2025-09-30 22:26:15.860915', 'step': 11510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:15.893164', 'step': 11510, 'epoch': 2} {'type': 'loss', 'content': 0.04857068508863449, 'timestamp': '2025-09-30 22:26:15.895586', 'step': 11511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:15.927033', 'step': 11511, 'epoch': 2} {'type': 'loss', 'content': 0.11732065677642822, 'timestamp': '2025-09-30 22:26:15.951169', 'step': 11512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:15.981465', 'step': 11512, 'epoch': 2} {'type': 'loss', 'content': 0.04366537928581238, 'timestamp': '2025-09-30 22:26:15.983937', 'step': 11513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.015037', 'step': 11513, 'epoch': 2} {'type': 'loss', 'content': 0.131494402885437, 'timestamp': '2025-09-30 22:26:16.019283', 'step': 11514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:16.054920', 'step': 11514, 'epoch': 2} {'type': 'loss', 'content': 0.11261057108640671, 'timestamp': '2025-09-30 22:26:16.059452', 'step': 11515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.093522', 'step': 11515, 'epoch': 2} {'type': 'loss', 'content': 0.08375945687294006, 'timestamp': '2025-09-30 22:26:16.119946', 'step': 11516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.153255', 'step': 11516, 'epoch': 2} {'type': 'loss', 'content': 0.13708604872226715, 'timestamp': '2025-09-30 22:26:16.156420', 'step': 11517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.187024', 'step': 11517, 'epoch': 2} {'type': 'loss', 'content': 0.10433299094438553, 'timestamp': '2025-09-30 22:26:16.189881', 'step': 11518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:16.220366', 'step': 11518, 'epoch': 2} {'type': 'loss', 'content': 0.14031970500946045, 'timestamp': '2025-09-30 22:26:16.225470', 'step': 11519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.255771', 'step': 11519, 'epoch': 2} {'type': 'loss', 'content': 0.1396583765745163, 'timestamp': '2025-09-30 22:26:16.283040', 'step': 11520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:16.323062', 'step': 11520, 'epoch': 2} {'type': 'loss', 'content': 0.1464957594871521, 'timestamp': '2025-09-30 22:26:16.328892', 'step': 11521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:16.361008', 'step': 11521, 'epoch': 2} {'type': 'loss', 'content': 0.14855358004570007, 'timestamp': '2025-09-30 22:26:16.364028', 'step': 11522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.394543', 'step': 11522, 'epoch': 2} {'type': 'loss', 'content': 0.11508379876613617, 'timestamp': '2025-09-30 22:26:16.397334', 'step': 11523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:16.428041', 'step': 11523, 'epoch': 2} {'type': 'loss', 'content': 0.14398185908794403, 'timestamp': '2025-09-30 22:26:16.455128', 'step': 11524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.495275', 'step': 11524, 'epoch': 2} {'type': 'loss', 'content': 0.08327274024486542, 'timestamp': '2025-09-30 22:26:16.503722', 'step': 11525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.536344', 'step': 11525, 'epoch': 2} {'type': 'loss', 'content': 0.10159654915332794, 'timestamp': '2025-09-30 22:26:16.542011', 'step': 11526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:16.583608', 'step': 11526, 'epoch': 2} {'type': 'loss', 'content': 0.08240877091884613, 'timestamp': '2025-09-30 22:26:16.586778', 'step': 11527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.622150', 'step': 11527, 'epoch': 2} {'type': 'loss', 'content': 0.0753621906042099, 'timestamp': '2025-09-30 22:26:16.646172', 'step': 11528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:16.677533', 'step': 11528, 'epoch': 2} {'type': 'loss', 'content': 0.12675583362579346, 'timestamp': '2025-09-30 22:26:16.680574', 'step': 11529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:16.723149', 'step': 11529, 'epoch': 2} {'type': 'loss', 'content': 0.06082049757242203, 'timestamp': '2025-09-30 22:26:16.730448', 'step': 11530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.762250', 'step': 11530, 'epoch': 2} {'type': 'loss', 'content': 0.07266740500926971, 'timestamp': '2025-09-30 22:26:16.765242', 'step': 11531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.796087', 'step': 11531, 'epoch': 2} {'type': 'loss', 'content': 0.10237003862857819, 'timestamp': '2025-09-30 22:26:16.821225', 'step': 11532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:16.851720', 'step': 11532, 'epoch': 2} {'type': 'loss', 'content': 0.09882984310388565, 'timestamp': '2025-09-30 22:26:16.855270', 'step': 11533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:16.894757', 'step': 11533, 'epoch': 2} {'type': 'loss', 'content': 0.04768333584070206, 'timestamp': '2025-09-30 22:26:16.898562', 'step': 11534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:16.930929', 'step': 11534, 'epoch': 2} {'type': 'loss', 'content': 0.11765310168266296, 'timestamp': '2025-09-30 22:26:16.934406', 'step': 11535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:16.965041', 'step': 11535, 'epoch': 2} {'type': 'loss', 'content': 0.12479783594608307, 'timestamp': '2025-09-30 22:26:16.989636', 'step': 11536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:17.020776', 'step': 11536, 'epoch': 2} {'type': 'loss', 'content': 0.12441378831863403, 'timestamp': '2025-09-30 22:26:17.023245', 'step': 11537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.056820', 'step': 11537, 'epoch': 2} {'type': 'loss', 'content': 0.08751562237739563, 'timestamp': '2025-09-30 22:26:17.059373', 'step': 11538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.089513', 'step': 11538, 'epoch': 2} {'type': 'loss', 'content': 0.04798236861824989, 'timestamp': '2025-09-30 22:26:17.092768', 'step': 11539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.124531', 'step': 11539, 'epoch': 2} {'type': 'loss', 'content': 0.16702134907245636, 'timestamp': '2025-09-30 22:26:17.148973', 'step': 11540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.180021', 'step': 11540, 'epoch': 2} {'type': 'loss', 'content': 0.08952119201421738, 'timestamp': '2025-09-30 22:26:17.184042', 'step': 11541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.220307', 'step': 11541, 'epoch': 2} {'type': 'loss', 'content': 0.12445078045129776, 'timestamp': '2025-09-30 22:26:17.227365', 'step': 11542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.258875', 'step': 11542, 'epoch': 2} {'type': 'loss', 'content': 0.08289400488138199, 'timestamp': '2025-09-30 22:26:17.261548', 'step': 11543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:17.293661', 'step': 11543, 'epoch': 2} {'type': 'loss', 'content': 0.09171351045370102, 'timestamp': '2025-09-30 22:26:17.319826', 'step': 11544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:17.349801', 'step': 11544, 'epoch': 2} {'type': 'loss', 'content': 0.13315704464912415, 'timestamp': '2025-09-30 22:26:17.352912', 'step': 11545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.385061', 'step': 11545, 'epoch': 2} {'type': 'loss', 'content': 0.09824567288160324, 'timestamp': '2025-09-30 22:26:17.388784', 'step': 11546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:17.427834', 'step': 11546, 'epoch': 2} {'type': 'loss', 'content': 0.14174574613571167, 'timestamp': '2025-09-30 22:26:17.434568', 'step': 11547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:17.468115', 'step': 11547, 'epoch': 2} {'type': 'loss', 'content': 0.11779898405075073, 'timestamp': '2025-09-30 22:26:17.494524', 'step': 11548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.534567', 'step': 11548, 'epoch': 2} {'type': 'loss', 'content': 0.06725781410932541, 'timestamp': '2025-09-30 22:26:17.539497', 'step': 11549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:17.587819', 'step': 11549, 'epoch': 2} {'type': 'loss', 'content': 0.03421574458479881, 'timestamp': '2025-09-30 22:26:17.596089', 'step': 11550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:17.650862', 'step': 11550, 'epoch': 2} {'type': 'loss', 'content': 0.055606551468372345, 'timestamp': '2025-09-30 22:26:17.655629', 'step': 11551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:17.706437', 'step': 11551, 'epoch': 2} {'type': 'loss', 'content': 0.06408347934484482, 'timestamp': '2025-09-30 22:26:17.732823', 'step': 11552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:17.788343', 'step': 11552, 'epoch': 2} {'type': 'loss', 'content': 0.08525198698043823, 'timestamp': '2025-09-30 22:26:17.792164', 'step': 11553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.856929', 'step': 11553, 'epoch': 2} {'type': 'loss', 'content': 0.06222482770681381, 'timestamp': '2025-09-30 22:26:17.864361', 'step': 11554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:17.945437', 'step': 11554, 'epoch': 2} {'type': 'loss', 'content': 0.09631775319576263, 'timestamp': '2025-09-30 22:26:17.947828', 'step': 11555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:17.996639', 'step': 11555, 'epoch': 2} {'type': 'loss', 'content': 0.15088917315006256, 'timestamp': '2025-09-30 22:26:18.022148', 'step': 11556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.076091', 'step': 11556, 'epoch': 2} {'type': 'loss', 'content': 0.08871679753065109, 'timestamp': '2025-09-30 22:26:18.080774', 'step': 11557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:18.151898', 'step': 11557, 'epoch': 2} {'type': 'loss', 'content': 0.1164465844631195, 'timestamp': '2025-09-30 22:26:18.157664', 'step': 11558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.211617', 'step': 11558, 'epoch': 2} {'type': 'loss', 'content': 0.14943525195121765, 'timestamp': '2025-09-30 22:26:18.218344', 'step': 11559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.262224', 'step': 11559, 'epoch': 2} {'type': 'loss', 'content': 0.07697094231843948, 'timestamp': '2025-09-30 22:26:18.288391', 'step': 11560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.337896', 'step': 11560, 'epoch': 2} {'type': 'loss', 'content': 0.06834975630044937, 'timestamp': '2025-09-30 22:26:18.342597', 'step': 11561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:18.397284', 'step': 11561, 'epoch': 2} {'type': 'loss', 'content': 0.09749449044466019, 'timestamp': '2025-09-30 22:26:18.400340', 'step': 11562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.461729', 'step': 11562, 'epoch': 2} {'type': 'loss', 'content': 0.129963681101799, 'timestamp': '2025-09-30 22:26:18.474775', 'step': 11563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:18.555266', 'step': 11563, 'epoch': 2} {'type': 'loss', 'content': 0.09813571721315384, 'timestamp': '2025-09-30 22:26:18.580400', 'step': 11564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:18.643949', 'step': 11564, 'epoch': 2} {'type': 'loss', 'content': 0.059181828051805496, 'timestamp': '2025-09-30 22:26:18.649956', 'step': 11565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:18.710162', 'step': 11565, 'epoch': 2} {'type': 'loss', 'content': 0.13922694325447083, 'timestamp': '2025-09-30 22:26:18.716646', 'step': 11566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:18.785515', 'step': 11566, 'epoch': 2} {'type': 'loss', 'content': 0.15620501339435577, 'timestamp': '2025-09-30 22:26:18.790848', 'step': 11567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:18.837776', 'step': 11567, 'epoch': 2} {'type': 'loss', 'content': 0.08005710691213608, 'timestamp': '2025-09-30 22:26:18.862423', 'step': 11568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:18.908269', 'step': 11568, 'epoch': 2} {'type': 'loss', 'content': 0.09165163338184357, 'timestamp': '2025-09-30 22:26:18.914294', 'step': 11569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:18.946534', 'step': 11569, 'epoch': 2} {'type': 'loss', 'content': 0.10080582648515701, 'timestamp': '2025-09-30 22:26:18.951408', 'step': 11570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:18.984265', 'step': 11570, 'epoch': 2} {'type': 'loss', 'content': 0.20384581387043, 'timestamp': '2025-09-30 22:26:18.988276', 'step': 11571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:19.021330', 'step': 11571, 'epoch': 2} {'type': 'loss', 'content': 0.06646237522363663, 'timestamp': '2025-09-30 22:26:19.049019', 'step': 11572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.080175', 'step': 11572, 'epoch': 2} {'type': 'loss', 'content': 0.07004596292972565, 'timestamp': '2025-09-30 22:26:19.088732', 'step': 11573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:19.121920', 'step': 11573, 'epoch': 2} {'type': 'loss', 'content': 0.08365073800086975, 'timestamp': '2025-09-30 22:26:19.134861', 'step': 11574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:19.176229', 'step': 11574, 'epoch': 2} {'type': 'loss', 'content': 0.14395473897457123, 'timestamp': '2025-09-30 22:26:19.178706', 'step': 11575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:19.211308', 'step': 11575, 'epoch': 2} {'type': 'loss', 'content': 0.029804691672325134, 'timestamp': '2025-09-30 22:26:19.236091', 'step': 11576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:19.267339', 'step': 11576, 'epoch': 2} {'type': 'loss', 'content': 0.12726019322872162, 'timestamp': '2025-09-30 22:26:19.270869', 'step': 11577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:19.307813', 'step': 11577, 'epoch': 2} {'type': 'loss', 'content': 0.0531395860016346, 'timestamp': '2025-09-30 22:26:19.311949', 'step': 11578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.347919', 'step': 11578, 'epoch': 2} {'type': 'loss', 'content': 0.15791887044906616, 'timestamp': '2025-09-30 22:26:19.351246', 'step': 11579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.384955', 'step': 11579, 'epoch': 2} {'type': 'loss', 'content': 0.044324588030576706, 'timestamp': '2025-09-30 22:26:19.418630', 'step': 11580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.460641', 'step': 11580, 'epoch': 2} {'type': 'loss', 'content': 0.15439504384994507, 'timestamp': '2025-09-30 22:26:19.465258', 'step': 11581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:19.504497', 'step': 11581, 'epoch': 2} {'type': 'loss', 'content': 0.08342589437961578, 'timestamp': '2025-09-30 22:26:19.510918', 'step': 11582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.548049', 'step': 11582, 'epoch': 2} {'type': 'loss', 'content': 0.09263020753860474, 'timestamp': '2025-09-30 22:26:19.552958', 'step': 11583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.584637', 'step': 11583, 'epoch': 2} {'type': 'loss', 'content': 0.12383797019720078, 'timestamp': '2025-09-30 22:26:19.612052', 'step': 11584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:19.644084', 'step': 11584, 'epoch': 2} {'type': 'loss', 'content': 0.13767682015895844, 'timestamp': '2025-09-30 22:26:19.647137', 'step': 11585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.679921', 'step': 11585, 'epoch': 2} {'type': 'loss', 'content': 0.1094997227191925, 'timestamp': '2025-09-30 22:26:19.682627', 'step': 11586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:19.715330', 'step': 11586, 'epoch': 2} {'type': 'loss', 'content': 0.08479809015989304, 'timestamp': '2025-09-30 22:26:19.718504', 'step': 11587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:19.749599', 'step': 11587, 'epoch': 2} {'type': 'loss', 'content': 0.05388927459716797, 'timestamp': '2025-09-30 22:26:19.775381', 'step': 11588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:19.814670', 'step': 11588, 'epoch': 2} {'type': 'loss', 'content': 0.03689589723944664, 'timestamp': '2025-09-30 22:26:19.826745', 'step': 11589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:19.863899', 'step': 11589, 'epoch': 2} {'type': 'loss', 'content': 0.13731229305267334, 'timestamp': '2025-09-30 22:26:19.874247', 'step': 11590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:19.911264', 'step': 11590, 'epoch': 2} {'type': 'loss', 'content': 0.12455146759748459, 'timestamp': '2025-09-30 22:26:19.925460', 'step': 11591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:19.958164', 'step': 11591, 'epoch': 2} {'type': 'loss', 'content': 0.08757627755403519, 'timestamp': '2025-09-30 22:26:19.982825', 'step': 11592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:20.016135', 'step': 11592, 'epoch': 2} {'type': 'loss', 'content': 0.13636814057826996, 'timestamp': '2025-09-30 22:26:20.019298', 'step': 11593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:20.069994', 'step': 11593, 'epoch': 2} {'type': 'loss', 'content': 0.17579889297485352, 'timestamp': '2025-09-30 22:26:20.072797', 'step': 11594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.105276', 'step': 11594, 'epoch': 2} {'type': 'loss', 'content': 0.07842059433460236, 'timestamp': '2025-09-30 22:26:20.108961', 'step': 11595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.142476', 'step': 11595, 'epoch': 2} {'type': 'loss', 'content': 0.15550518035888672, 'timestamp': '2025-09-30 22:26:20.167940', 'step': 11596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.200870', 'step': 11596, 'epoch': 2} {'type': 'loss', 'content': 0.0974380224943161, 'timestamp': '2025-09-30 22:26:20.204026', 'step': 11597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:20.234354', 'step': 11597, 'epoch': 2} {'type': 'loss', 'content': 0.1403948962688446, 'timestamp': '2025-09-30 22:26:20.240915', 'step': 11598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.273495', 'step': 11598, 'epoch': 2} {'type': 'loss', 'content': 0.14982372522354126, 'timestamp': '2025-09-30 22:26:20.276660', 'step': 11599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.308021', 'step': 11599, 'epoch': 2} {'type': 'loss', 'content': 0.06297582387924194, 'timestamp': '2025-09-30 22:26:20.335014', 'step': 11600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.375282', 'step': 11600, 'epoch': 2} {'type': 'loss', 'content': 0.03441569209098816, 'timestamp': '2025-09-30 22:26:20.379543', 'step': 11601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.414592', 'step': 11601, 'epoch': 2} {'type': 'loss', 'content': 0.04832754656672478, 'timestamp': '2025-09-30 22:26:20.417951', 'step': 11602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.449301', 'step': 11602, 'epoch': 2} {'type': 'loss', 'content': 0.08664489537477493, 'timestamp': '2025-09-30 22:26:20.452042', 'step': 11603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.483531', 'step': 11603, 'epoch': 2} {'type': 'loss', 'content': 0.12986606359481812, 'timestamp': '2025-09-30 22:26:20.509313', 'step': 11604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.540963', 'step': 11604, 'epoch': 2} {'type': 'loss', 'content': 0.0799543559551239, 'timestamp': '2025-09-30 22:26:20.544086', 'step': 11605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.575480', 'step': 11605, 'epoch': 2} {'type': 'loss', 'content': 0.10502313077449799, 'timestamp': '2025-09-30 22:26:20.581164', 'step': 11606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:20.612699', 'step': 11606, 'epoch': 2} {'type': 'loss', 'content': 0.14249005913734436, 'timestamp': '2025-09-30 22:26:20.617505', 'step': 11607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.648597', 'step': 11607, 'epoch': 2} {'type': 'loss', 'content': 0.07907267659902573, 'timestamp': '2025-09-30 22:26:20.673057', 'step': 11608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:20.705168', 'step': 11608, 'epoch': 2} {'type': 'loss', 'content': 0.08032406866550446, 'timestamp': '2025-09-30 22:26:20.718427', 'step': 11609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:20.749834', 'step': 11609, 'epoch': 2} {'type': 'loss', 'content': 0.09420450031757355, 'timestamp': '2025-09-30 22:26:20.752638', 'step': 11610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.784164', 'step': 11610, 'epoch': 2} {'type': 'loss', 'content': 0.11971496790647507, 'timestamp': '2025-09-30 22:26:20.786927', 'step': 11611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:20.818361', 'step': 11611, 'epoch': 2} {'type': 'loss', 'content': 0.0771126076579094, 'timestamp': '2025-09-30 22:26:20.844939', 'step': 11612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:20.883341', 'step': 11612, 'epoch': 2} {'type': 'loss', 'content': 0.1158846989274025, 'timestamp': '2025-09-30 22:26:20.894123', 'step': 11613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:20.926375', 'step': 11613, 'epoch': 2} {'type': 'loss', 'content': 0.11313421279191971, 'timestamp': '2025-09-30 22:26:20.931536', 'step': 11614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:20.964050', 'step': 11614, 'epoch': 2} {'type': 'loss', 'content': 0.03228474035859108, 'timestamp': '2025-09-30 22:26:20.968120', 'step': 11615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.000353', 'step': 11615, 'epoch': 2} {'type': 'loss', 'content': 0.08322980999946594, 'timestamp': '2025-09-30 22:26:21.031441', 'step': 11616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:21.069897', 'step': 11616, 'epoch': 2} {'type': 'loss', 'content': 0.1573551893234253, 'timestamp': '2025-09-30 22:26:21.073870', 'step': 11617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:21.106175', 'step': 11617, 'epoch': 2} {'type': 'loss', 'content': 0.09717357903718948, 'timestamp': '2025-09-30 22:26:21.117006', 'step': 11618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.149649', 'step': 11618, 'epoch': 2} {'type': 'loss', 'content': 0.11635567247867584, 'timestamp': '2025-09-30 22:26:21.154384', 'step': 11619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.194834', 'step': 11619, 'epoch': 2} {'type': 'loss', 'content': 0.14207451045513153, 'timestamp': '2025-09-30 22:26:21.224585', 'step': 11620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.267578', 'step': 11620, 'epoch': 2} {'type': 'loss', 'content': 0.07336035370826721, 'timestamp': '2025-09-30 22:26:21.271933', 'step': 11621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:21.304282', 'step': 11621, 'epoch': 2} {'type': 'loss', 'content': 0.1498183012008667, 'timestamp': '2025-09-30 22:26:21.320536', 'step': 11622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.360157', 'step': 11622, 'epoch': 2} {'type': 'loss', 'content': 0.17460080981254578, 'timestamp': '2025-09-30 22:26:21.365227', 'step': 11623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:21.395452', 'step': 11623, 'epoch': 2} {'type': 'loss', 'content': 0.029673447832465172, 'timestamp': '2025-09-30 22:26:21.420341', 'step': 11624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.458514', 'step': 11624, 'epoch': 2} {'type': 'loss', 'content': 0.10753069818019867, 'timestamp': '2025-09-30 22:26:21.467310', 'step': 11625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.498714', 'step': 11625, 'epoch': 2} {'type': 'loss', 'content': 0.11461781710386276, 'timestamp': '2025-09-30 22:26:21.502539', 'step': 11626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.543365', 'step': 11626, 'epoch': 2} {'type': 'loss', 'content': 0.11478759348392487, 'timestamp': '2025-09-30 22:26:21.547780', 'step': 11627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.578620', 'step': 11627, 'epoch': 2} {'type': 'loss', 'content': 0.04859267175197601, 'timestamp': '2025-09-30 22:26:21.608147', 'step': 11628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:21.638159', 'step': 11628, 'epoch': 2} {'type': 'loss', 'content': 0.05959958955645561, 'timestamp': '2025-09-30 22:26:21.642265', 'step': 11629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:21.676763', 'step': 11629, 'epoch': 2} {'type': 'loss', 'content': 0.08322230726480484, 'timestamp': '2025-09-30 22:26:21.687388', 'step': 11630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:21.742948', 'step': 11630, 'epoch': 2} {'type': 'loss', 'content': 0.03257203474640846, 'timestamp': '2025-09-30 22:26:21.746022', 'step': 11631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:21.776633', 'step': 11631, 'epoch': 2} {'type': 'loss', 'content': 0.12880447506904602, 'timestamp': '2025-09-30 22:26:21.804482', 'step': 11632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:21.838804', 'step': 11632, 'epoch': 2} {'type': 'loss', 'content': 0.09868602454662323, 'timestamp': '2025-09-30 22:26:21.845627', 'step': 11633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:21.887185', 'step': 11633, 'epoch': 2} {'type': 'loss', 'content': 0.05235728248953819, 'timestamp': '2025-09-30 22:26:21.891536', 'step': 11634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:21.923433', 'step': 11634, 'epoch': 2} {'type': 'loss', 'content': 0.06785537302494049, 'timestamp': '2025-09-30 22:26:21.937774', 'step': 11635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:21.980321', 'step': 11635, 'epoch': 2} {'type': 'loss', 'content': 0.06868971884250641, 'timestamp': '2025-09-30 22:26:22.015966', 'step': 11636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.047520', 'step': 11636, 'epoch': 2} {'type': 'loss', 'content': 0.1012187972664833, 'timestamp': '2025-09-30 22:26:22.060317', 'step': 11637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.095154', 'step': 11637, 'epoch': 2} {'type': 'loss', 'content': 0.11347655206918716, 'timestamp': '2025-09-30 22:26:22.099658', 'step': 11638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:22.143277', 'step': 11638, 'epoch': 2} {'type': 'loss', 'content': 0.14500324428081512, 'timestamp': '2025-09-30 22:26:22.147049', 'step': 11639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:22.178665', 'step': 11639, 'epoch': 2} {'type': 'loss', 'content': 0.09740432351827621, 'timestamp': '2025-09-30 22:26:22.211725', 'step': 11640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.249323', 'step': 11640, 'epoch': 2} {'type': 'loss', 'content': 0.03216603770852089, 'timestamp': '2025-09-30 22:26:22.252457', 'step': 11641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.297336', 'step': 11641, 'epoch': 2} {'type': 'loss', 'content': 0.08276955783367157, 'timestamp': '2025-09-30 22:26:22.300570', 'step': 11642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.331759', 'step': 11642, 'epoch': 2} {'type': 'loss', 'content': 0.16060836613178253, 'timestamp': '2025-09-30 22:26:22.344108', 'step': 11643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:22.390440', 'step': 11643, 'epoch': 2} {'type': 'loss', 'content': 0.08694811165332794, 'timestamp': '2025-09-30 22:26:22.427305', 'step': 11644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:22.460168', 'step': 11644, 'epoch': 2} {'type': 'loss', 'content': 0.1151275560259819, 'timestamp': '2025-09-30 22:26:22.463425', 'step': 11645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:22.498009', 'step': 11645, 'epoch': 2} {'type': 'loss', 'content': 0.08548573404550552, 'timestamp': '2025-09-30 22:26:22.504136', 'step': 11646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.535210', 'step': 11646, 'epoch': 2} {'type': 'loss', 'content': 0.06793919950723648, 'timestamp': '2025-09-30 22:26:22.545458', 'step': 11647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:22.577908', 'step': 11647, 'epoch': 2} {'type': 'loss', 'content': 0.08457045257091522, 'timestamp': '2025-09-30 22:26:22.610276', 'step': 11648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:22.641981', 'step': 11648, 'epoch': 2} {'type': 'loss', 'content': 0.030834831297397614, 'timestamp': '2025-09-30 22:26:22.647432', 'step': 11649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:22.680157', 'step': 11649, 'epoch': 2} {'type': 'loss', 'content': 0.1401887685060501, 'timestamp': '2025-09-30 22:26:22.685083', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:26:31.012125', 'step': 11650, 'epoch': 2} {'type': 'pplx', 'content': 11996.148209032246, 'timestamp': '2025-09-30 22:26:31.019869', 'step': 11650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.068106', 'step': 11650, 'epoch': 2} {'type': 'loss', 'content': 0.06991241127252579, 'timestamp': '2025-09-30 22:26:31.071232', 'step': 11651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.102294', 'step': 11651, 'epoch': 2} {'type': 'loss', 'content': 0.12134716659784317, 'timestamp': '2025-09-30 22:26:31.127951', 'step': 11652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.159798', 'step': 11652, 'epoch': 2} {'type': 'loss', 'content': 0.10761716961860657, 'timestamp': '2025-09-30 22:26:31.165112', 'step': 11653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:31.196576', 'step': 11653, 'epoch': 2} {'type': 'loss', 'content': 0.19036084413528442, 'timestamp': '2025-09-30 22:26:31.199886', 'step': 11654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:31.231461', 'step': 11654, 'epoch': 2} {'type': 'loss', 'content': 0.08138496428728104, 'timestamp': '2025-09-30 22:26:31.234306', 'step': 11655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:31.264964', 'step': 11655, 'epoch': 2} {'type': 'loss', 'content': 0.07626178860664368, 'timestamp': '2025-09-30 22:26:31.299974', 'step': 11656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.331715', 'step': 11656, 'epoch': 2} {'type': 'loss', 'content': 0.07899738103151321, 'timestamp': '2025-09-30 22:26:31.343017', 'step': 11657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.381273', 'step': 11657, 'epoch': 2} {'type': 'loss', 'content': 0.07170227915048599, 'timestamp': '2025-09-30 22:26:31.390718', 'step': 11658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.422156', 'step': 11658, 'epoch': 2} {'type': 'loss', 'content': 0.08887317776679993, 'timestamp': '2025-09-30 22:26:31.426649', 'step': 11659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.457429', 'step': 11659, 'epoch': 2} {'type': 'loss', 'content': 0.09229566901922226, 'timestamp': '2025-09-30 22:26:31.486522', 'step': 11660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.523594', 'step': 11660, 'epoch': 2} {'type': 'loss', 'content': 0.08286308497190475, 'timestamp': '2025-09-30 22:26:31.526198', 'step': 11661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.556605', 'step': 11661, 'epoch': 2} {'type': 'loss', 'content': 0.1794651746749878, 'timestamp': '2025-09-30 22:26:31.563356', 'step': 11662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.594163', 'step': 11662, 'epoch': 2} {'type': 'loss', 'content': 0.10362857580184937, 'timestamp': '2025-09-30 22:26:31.596359', 'step': 11663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:31.630861', 'step': 11663, 'epoch': 2} {'type': 'loss', 'content': 0.1499774008989334, 'timestamp': '2025-09-30 22:26:31.655742', 'step': 11664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.690288', 'step': 11664, 'epoch': 2} {'type': 'loss', 'content': 0.050379469990730286, 'timestamp': '2025-09-30 22:26:31.693619', 'step': 11665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.724024', 'step': 11665, 'epoch': 2} {'type': 'loss', 'content': 0.13522419333457947, 'timestamp': '2025-09-30 22:26:31.734733', 'step': 11666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.766172', 'step': 11666, 'epoch': 2} {'type': 'loss', 'content': 0.07658381760120392, 'timestamp': '2025-09-30 22:26:31.779790', 'step': 11667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.819052', 'step': 11667, 'epoch': 2} {'type': 'loss', 'content': 0.07098308205604553, 'timestamp': '2025-09-30 22:26:31.843667', 'step': 11668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.874787', 'step': 11668, 'epoch': 2} {'type': 'loss', 'content': 0.14747847616672516, 'timestamp': '2025-09-30 22:26:31.883682', 'step': 11669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:31.923783', 'step': 11669, 'epoch': 2} {'type': 'loss', 'content': 0.12848331034183502, 'timestamp': '2025-09-30 22:26:31.932440', 'step': 11670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:31.963724', 'step': 11670, 'epoch': 2} {'type': 'loss', 'content': 0.05596788600087166, 'timestamp': '2025-09-30 22:26:31.971351', 'step': 11671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.010076', 'step': 11671, 'epoch': 2} {'type': 'loss', 'content': 0.2478903979063034, 'timestamp': '2025-09-30 22:26:32.036418', 'step': 11672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.075424', 'step': 11672, 'epoch': 2} {'type': 'loss', 'content': 0.06435446441173553, 'timestamp': '2025-09-30 22:26:32.078613', 'step': 11673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.108773', 'step': 11673, 'epoch': 2} {'type': 'loss', 'content': 0.06727973371744156, 'timestamp': '2025-09-30 22:26:32.119028', 'step': 11674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.154999', 'step': 11674, 'epoch': 2} {'type': 'loss', 'content': 0.14917044341564178, 'timestamp': '2025-09-30 22:26:32.158158', 'step': 11675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.188751', 'step': 11675, 'epoch': 2} {'type': 'loss', 'content': 0.14467526972293854, 'timestamp': '2025-09-30 22:26:32.214724', 'step': 11676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.253932', 'step': 11676, 'epoch': 2} {'type': 'loss', 'content': 0.12120069563388824, 'timestamp': '2025-09-30 22:26:32.258094', 'step': 11677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.289579', 'step': 11677, 'epoch': 2} {'type': 'loss', 'content': 0.11105778068304062, 'timestamp': '2025-09-30 22:26:32.300339', 'step': 11678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.332158', 'step': 11678, 'epoch': 2} {'type': 'loss', 'content': 0.11838974803686142, 'timestamp': '2025-09-30 22:26:32.336049', 'step': 11679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:32.369079', 'step': 11679, 'epoch': 2} {'type': 'loss', 'content': 0.10067763179540634, 'timestamp': '2025-09-30 22:26:32.393618', 'step': 11680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.425428', 'step': 11680, 'epoch': 2} {'type': 'loss', 'content': 0.055055342614650726, 'timestamp': '2025-09-30 22:26:32.430216', 'step': 11681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.469328', 'step': 11681, 'epoch': 2} {'type': 'loss', 'content': 0.0605444461107254, 'timestamp': '2025-09-30 22:26:32.472735', 'step': 11682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.511334', 'step': 11682, 'epoch': 2} {'type': 'loss', 'content': 0.15220148861408234, 'timestamp': '2025-09-30 22:26:32.522276', 'step': 11683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.562756', 'step': 11683, 'epoch': 2} {'type': 'loss', 'content': 0.13590408861637115, 'timestamp': '2025-09-30 22:26:32.590892', 'step': 11684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.626948', 'step': 11684, 'epoch': 2} {'type': 'loss', 'content': 0.08228567242622375, 'timestamp': '2025-09-30 22:26:32.631745', 'step': 11685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:32.675978', 'step': 11685, 'epoch': 2} {'type': 'loss', 'content': 0.1316899210214615, 'timestamp': '2025-09-30 22:26:32.680310', 'step': 11686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:32.719722', 'step': 11686, 'epoch': 2} {'type': 'loss', 'content': 0.17492547631263733, 'timestamp': '2025-09-30 22:26:32.723752', 'step': 11687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.770437', 'step': 11687, 'epoch': 2} {'type': 'loss', 'content': 0.15338477492332458, 'timestamp': '2025-09-30 22:26:32.796099', 'step': 11688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:32.829083', 'step': 11688, 'epoch': 2} {'type': 'loss', 'content': 0.055011965334415436, 'timestamp': '2025-09-30 22:26:32.834446', 'step': 11689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:32.868896', 'step': 11689, 'epoch': 2} {'type': 'loss', 'content': 0.0688057690858841, 'timestamp': '2025-09-30 22:26:32.871777', 'step': 11690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:32.906535', 'step': 11690, 'epoch': 2} {'type': 'loss', 'content': 0.06210368499159813, 'timestamp': '2025-09-30 22:26:32.910855', 'step': 11691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:32.944276', 'step': 11691, 'epoch': 2} {'type': 'loss', 'content': 0.11816521733999252, 'timestamp': '2025-09-30 22:26:32.969956', 'step': 11692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.002984', 'step': 11692, 'epoch': 2} {'type': 'loss', 'content': 0.1345883011817932, 'timestamp': '2025-09-30 22:26:33.007628', 'step': 11693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:33.047240', 'step': 11693, 'epoch': 2} {'type': 'loss', 'content': 0.1358463019132614, 'timestamp': '2025-09-30 22:26:33.062036', 'step': 11694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:33.093479', 'step': 11694, 'epoch': 2} {'type': 'loss', 'content': 0.23770716786384583, 'timestamp': '2025-09-30 22:26:33.097236', 'step': 11695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.132707', 'step': 11695, 'epoch': 2} {'type': 'loss', 'content': 0.20845471322536469, 'timestamp': '2025-09-30 22:26:33.160001', 'step': 11696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.201935', 'step': 11696, 'epoch': 2} {'type': 'loss', 'content': 0.10618903487920761, 'timestamp': '2025-09-30 22:26:33.206413', 'step': 11697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.239008', 'step': 11697, 'epoch': 2} {'type': 'loss', 'content': 0.10308753699064255, 'timestamp': '2025-09-30 22:26:33.243724', 'step': 11698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.275830', 'step': 11698, 'epoch': 2} {'type': 'loss', 'content': 0.06269132345914841, 'timestamp': '2025-09-30 22:26:33.281372', 'step': 11699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.314697', 'step': 11699, 'epoch': 2} {'type': 'loss', 'content': 0.020548783242702484, 'timestamp': '2025-09-30 22:26:33.340450', 'step': 11700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:33.372764', 'step': 11700, 'epoch': 2} {'type': 'loss', 'content': 0.0453760102391243, 'timestamp': '2025-09-30 22:26:33.377354', 'step': 11701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:33.414182', 'step': 11701, 'epoch': 2} {'type': 'loss', 'content': 0.045531921088695526, 'timestamp': '2025-09-30 22:26:33.418229', 'step': 11702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:33.449783', 'step': 11702, 'epoch': 2} {'type': 'loss', 'content': 0.15140992403030396, 'timestamp': '2025-09-30 22:26:33.453441', 'step': 11703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.485107', 'step': 11703, 'epoch': 2} {'type': 'loss', 'content': 0.1255311220884323, 'timestamp': '2025-09-30 22:26:33.510250', 'step': 11704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.542436', 'step': 11704, 'epoch': 2} {'type': 'loss', 'content': 0.07207014411687851, 'timestamp': '2025-09-30 22:26:33.545843', 'step': 11705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.578632', 'step': 11705, 'epoch': 2} {'type': 'loss', 'content': 0.09501337260007858, 'timestamp': '2025-09-30 22:26:33.588472', 'step': 11706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.622102', 'step': 11706, 'epoch': 2} {'type': 'loss', 'content': 0.2099667638540268, 'timestamp': '2025-09-30 22:26:33.629848', 'step': 11707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:33.665851', 'step': 11707, 'epoch': 2} {'type': 'loss', 'content': 0.1159718930721283, 'timestamp': '2025-09-30 22:26:33.701068', 'step': 11708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.739243', 'step': 11708, 'epoch': 2} {'type': 'loss', 'content': 0.1824481338262558, 'timestamp': '2025-09-30 22:26:33.743888', 'step': 11709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:33.777273', 'step': 11709, 'epoch': 2} {'type': 'loss', 'content': 0.16285370290279388, 'timestamp': '2025-09-30 22:26:33.782439', 'step': 11710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:33.815636', 'step': 11710, 'epoch': 2} {'type': 'loss', 'content': 0.10246705263853073, 'timestamp': '2025-09-30 22:26:33.820486', 'step': 11711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:33.852621', 'step': 11711, 'epoch': 2} {'type': 'loss', 'content': 0.06450377404689789, 'timestamp': '2025-09-30 22:26:33.883099', 'step': 11712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:33.923958', 'step': 11712, 'epoch': 2} {'type': 'loss', 'content': 0.09431228041648865, 'timestamp': '2025-09-30 22:26:33.926974', 'step': 11713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:33.965857', 'step': 11713, 'epoch': 2} {'type': 'loss', 'content': 0.16210031509399414, 'timestamp': '2025-09-30 22:26:33.974929', 'step': 11714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:34.012280', 'step': 11714, 'epoch': 2} {'type': 'loss', 'content': 0.12857623398303986, 'timestamp': '2025-09-30 22:26:34.016237', 'step': 11715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:26:34.053013', 'step': 11715, 'epoch': 2} {'type': 'loss', 'content': 0.07420843094587326, 'timestamp': '2025-09-30 22:26:34.084116', 'step': 11716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:34.121409', 'step': 11716, 'epoch': 2} {'type': 'loss', 'content': 0.18018123507499695, 'timestamp': '2025-09-30 22:26:34.131828', 'step': 11717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.164332', 'step': 11717, 'epoch': 2} {'type': 'loss', 'content': 0.2101925015449524, 'timestamp': '2025-09-30 22:26:34.167925', 'step': 11718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:34.200691', 'step': 11718, 'epoch': 2} {'type': 'loss', 'content': 0.09071808308362961, 'timestamp': '2025-09-30 22:26:34.204826', 'step': 11719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:34.238310', 'step': 11719, 'epoch': 2} {'type': 'loss', 'content': 0.08456515520811081, 'timestamp': '2025-09-30 22:26:34.262763', 'step': 11720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:34.295376', 'step': 11720, 'epoch': 2} {'type': 'loss', 'content': 0.14404022693634033, 'timestamp': '2025-09-30 22:26:34.298638', 'step': 11721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.329546', 'step': 11721, 'epoch': 2} {'type': 'loss', 'content': 0.08719174563884735, 'timestamp': '2025-09-30 22:26:34.333300', 'step': 11722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:34.364482', 'step': 11722, 'epoch': 2} {'type': 'loss', 'content': 0.1224670559167862, 'timestamp': '2025-09-30 22:26:34.368232', 'step': 11723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.401161', 'step': 11723, 'epoch': 2} {'type': 'loss', 'content': 0.06066098064184189, 'timestamp': '2025-09-30 22:26:34.425823', 'step': 11724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.463340', 'step': 11724, 'epoch': 2} {'type': 'loss', 'content': 0.09197188168764114, 'timestamp': '2025-09-30 22:26:34.467571', 'step': 11725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:34.499545', 'step': 11725, 'epoch': 2} {'type': 'loss', 'content': 0.16728493571281433, 'timestamp': '2025-09-30 22:26:34.506784', 'step': 11726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:34.540602', 'step': 11726, 'epoch': 2} {'type': 'loss', 'content': 0.09192655235528946, 'timestamp': '2025-09-30 22:26:34.547747', 'step': 11727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:34.584419', 'step': 11727, 'epoch': 2} {'type': 'loss', 'content': 0.11999522894620895, 'timestamp': '2025-09-30 22:26:34.613939', 'step': 11728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:34.647923', 'step': 11728, 'epoch': 2} {'type': 'loss', 'content': 0.14534957706928253, 'timestamp': '2025-09-30 22:26:34.652897', 'step': 11729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.685885', 'step': 11729, 'epoch': 2} {'type': 'loss', 'content': 0.05755279213190079, 'timestamp': '2025-09-30 22:26:34.689908', 'step': 11730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:34.723051', 'step': 11730, 'epoch': 2} {'type': 'loss', 'content': 0.08713438361883163, 'timestamp': '2025-09-30 22:26:34.727796', 'step': 11731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.762148', 'step': 11731, 'epoch': 2} {'type': 'loss', 'content': 0.1413479596376419, 'timestamp': '2025-09-30 22:26:34.787366', 'step': 11732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:34.820265', 'step': 11732, 'epoch': 2} {'type': 'loss', 'content': 0.11963167786598206, 'timestamp': '2025-09-30 22:26:34.825535', 'step': 11733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.859452', 'step': 11733, 'epoch': 2} {'type': 'loss', 'content': 0.18710508942604065, 'timestamp': '2025-09-30 22:26:34.862176', 'step': 11734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:34.894724', 'step': 11734, 'epoch': 2} {'type': 'loss', 'content': 0.10028254985809326, 'timestamp': '2025-09-30 22:26:34.897442', 'step': 11735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:34.928544', 'step': 11735, 'epoch': 2} {'type': 'loss', 'content': 0.08017966151237488, 'timestamp': '2025-09-30 22:26:34.960673', 'step': 11736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:34.994155', 'step': 11736, 'epoch': 2} {'type': 'loss', 'content': 0.08359848707914352, 'timestamp': '2025-09-30 22:26:34.998128', 'step': 11737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:35.028778', 'step': 11737, 'epoch': 2} {'type': 'loss', 'content': 0.038206495344638824, 'timestamp': '2025-09-30 22:26:35.032086', 'step': 11738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:35.062410', 'step': 11738, 'epoch': 2} {'type': 'loss', 'content': 0.1168193370103836, 'timestamp': '2025-09-30 22:26:35.065918', 'step': 11739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:35.098636', 'step': 11739, 'epoch': 2} {'type': 'loss', 'content': 0.18027402460575104, 'timestamp': '2025-09-30 22:26:35.133801', 'step': 11740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.171336', 'step': 11740, 'epoch': 2} {'type': 'loss', 'content': 0.10205397754907608, 'timestamp': '2025-09-30 22:26:35.174769', 'step': 11741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:35.218283', 'step': 11741, 'epoch': 2} {'type': 'loss', 'content': 0.05128014460206032, 'timestamp': '2025-09-30 22:26:35.220766', 'step': 11742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:35.253165', 'step': 11742, 'epoch': 2} {'type': 'loss', 'content': 0.10846797376871109, 'timestamp': '2025-09-30 22:26:35.256748', 'step': 11743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:35.300271', 'step': 11743, 'epoch': 2} {'type': 'loss', 'content': 0.14642156660556793, 'timestamp': '2025-09-30 22:26:35.325375', 'step': 11744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.356082', 'step': 11744, 'epoch': 2} {'type': 'loss', 'content': 0.09857556223869324, 'timestamp': '2025-09-30 22:26:35.359497', 'step': 11745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.395916', 'step': 11745, 'epoch': 2} {'type': 'loss', 'content': 0.11305396258831024, 'timestamp': '2025-09-30 22:26:35.403943', 'step': 11746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:35.439751', 'step': 11746, 'epoch': 2} {'type': 'loss', 'content': 0.162567138671875, 'timestamp': '2025-09-30 22:26:35.449116', 'step': 11747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.485548', 'step': 11747, 'epoch': 2} {'type': 'loss', 'content': 0.08830250054597855, 'timestamp': '2025-09-30 22:26:35.510106', 'step': 11748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:35.543088', 'step': 11748, 'epoch': 2} {'type': 'loss', 'content': 0.09170856326818466, 'timestamp': '2025-09-30 22:26:35.547702', 'step': 11749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.578163', 'step': 11749, 'epoch': 2} {'type': 'loss', 'content': 0.20948010683059692, 'timestamp': '2025-09-30 22:26:35.580901', 'step': 11750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:35.613987', 'step': 11750, 'epoch': 2} {'type': 'loss', 'content': 0.09952587634325027, 'timestamp': '2025-09-30 22:26:35.617307', 'step': 11751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.650165', 'step': 11751, 'epoch': 2} {'type': 'loss', 'content': 0.07186926156282425, 'timestamp': '2025-09-30 22:26:35.675330', 'step': 11752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:35.709440', 'step': 11752, 'epoch': 2} {'type': 'loss', 'content': 0.2430926114320755, 'timestamp': '2025-09-30 22:26:35.712124', 'step': 11753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:35.751266', 'step': 11753, 'epoch': 2} {'type': 'loss', 'content': 0.08093953132629395, 'timestamp': '2025-09-30 22:26:35.754271', 'step': 11754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:35.784858', 'step': 11754, 'epoch': 2} {'type': 'loss', 'content': 0.1017104759812355, 'timestamp': '2025-09-30 22:26:35.787795', 'step': 11755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:35.819797', 'step': 11755, 'epoch': 2} {'type': 'loss', 'content': 0.1407671719789505, 'timestamp': '2025-09-30 22:26:35.843651', 'step': 11756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.874341', 'step': 11756, 'epoch': 2} {'type': 'loss', 'content': 0.17220807075500488, 'timestamp': '2025-09-30 22:26:35.877526', 'step': 11757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:35.913157', 'step': 11757, 'epoch': 2} {'type': 'loss', 'content': 0.10046955943107605, 'timestamp': '2025-09-30 22:26:35.917906', 'step': 11758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.956356', 'step': 11758, 'epoch': 2} {'type': 'loss', 'content': 0.20377862453460693, 'timestamp': '2025-09-30 22:26:35.960395', 'step': 11759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:35.998035', 'step': 11759, 'epoch': 2} {'type': 'loss', 'content': 0.16832171380519867, 'timestamp': '2025-09-30 22:26:36.022633', 'step': 11760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:26:36.052922', 'step': 11760, 'epoch': 2} {'type': 'loss', 'content': 0.1698455661535263, 'timestamp': '2025-09-30 22:26:36.055792', 'step': 11761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:36.088078', 'step': 11761, 'epoch': 2} {'type': 'loss', 'content': 0.07298722118139267, 'timestamp': '2025-09-30 22:26:36.091483', 'step': 11762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.123324', 'step': 11762, 'epoch': 2} {'type': 'loss', 'content': 0.11929734796285629, 'timestamp': '2025-09-30 22:26:36.127352', 'step': 11763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.157390', 'step': 11763, 'epoch': 2} {'type': 'loss', 'content': 0.09673571586608887, 'timestamp': '2025-09-30 22:26:36.182392', 'step': 11764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.228038', 'step': 11764, 'epoch': 2} {'type': 'loss', 'content': 0.11751283705234528, 'timestamp': '2025-09-30 22:26:36.237957', 'step': 11765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:36.274444', 'step': 11765, 'epoch': 2} {'type': 'loss', 'content': 0.059108708053827286, 'timestamp': '2025-09-30 22:26:36.284235', 'step': 11766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:36.322331', 'step': 11766, 'epoch': 2} {'type': 'loss', 'content': 0.090317003428936, 'timestamp': '2025-09-30 22:26:36.325367', 'step': 11767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:36.356071', 'step': 11767, 'epoch': 2} {'type': 'loss', 'content': 0.15558001399040222, 'timestamp': '2025-09-30 22:26:36.381508', 'step': 11768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.413686', 'step': 11768, 'epoch': 2} {'type': 'loss', 'content': 0.12751545011997223, 'timestamp': '2025-09-30 22:26:36.416462', 'step': 11769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.449168', 'step': 11769, 'epoch': 2} {'type': 'loss', 'content': 0.14580410718917847, 'timestamp': '2025-09-30 22:26:36.452571', 'step': 11770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.490194', 'step': 11770, 'epoch': 2} {'type': 'loss', 'content': 0.02781352773308754, 'timestamp': '2025-09-30 22:26:36.494168', 'step': 11771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.527118', 'step': 11771, 'epoch': 2} {'type': 'loss', 'content': 0.12179592996835709, 'timestamp': '2025-09-30 22:26:36.553545', 'step': 11772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.586862', 'step': 11772, 'epoch': 2} {'type': 'loss', 'content': 0.06527459621429443, 'timestamp': '2025-09-30 22:26:36.596135', 'step': 11773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.629396', 'step': 11773, 'epoch': 2} {'type': 'loss', 'content': 0.14513106644153595, 'timestamp': '2025-09-30 22:26:36.633004', 'step': 11774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:36.674345', 'step': 11774, 'epoch': 2} {'type': 'loss', 'content': 0.14441268146038055, 'timestamp': '2025-09-30 22:26:36.678174', 'step': 11775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.708388', 'step': 11775, 'epoch': 2} {'type': 'loss', 'content': 0.0759444460272789, 'timestamp': '2025-09-30 22:26:36.732946', 'step': 11776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:36.766159', 'step': 11776, 'epoch': 2} {'type': 'loss', 'content': 0.22029919922351837, 'timestamp': '2025-09-30 22:26:36.769581', 'step': 11777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:36.801524', 'step': 11777, 'epoch': 2} {'type': 'loss', 'content': 0.057806648313999176, 'timestamp': '2025-09-30 22:26:36.806339', 'step': 11778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:36.852658', 'step': 11778, 'epoch': 2} {'type': 'loss', 'content': 0.1050543338060379, 'timestamp': '2025-09-30 22:26:36.855925', 'step': 11779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.887564', 'step': 11779, 'epoch': 2} {'type': 'loss', 'content': 0.1338188648223877, 'timestamp': '2025-09-30 22:26:36.913491', 'step': 11780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.957937', 'step': 11780, 'epoch': 2} {'type': 'loss', 'content': 0.09864244610071182, 'timestamp': '2025-09-30 22:26:36.961441', 'step': 11781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:36.992833', 'step': 11781, 'epoch': 2} {'type': 'loss', 'content': 0.10382993519306183, 'timestamp': '2025-09-30 22:26:36.997030', 'step': 11782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.030849', 'step': 11782, 'epoch': 2} {'type': 'loss', 'content': 0.14399605989456177, 'timestamp': '2025-09-30 22:26:37.039682', 'step': 11783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:37.074037', 'step': 11783, 'epoch': 2} {'type': 'loss', 'content': 0.13216017186641693, 'timestamp': '2025-09-30 22:26:37.099583', 'step': 11784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:37.131636', 'step': 11784, 'epoch': 2} {'type': 'loss', 'content': 0.09084391593933105, 'timestamp': '2025-09-30 22:26:37.144301', 'step': 11785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.182840', 'step': 11785, 'epoch': 2} {'type': 'loss', 'content': 0.10264982283115387, 'timestamp': '2025-09-30 22:26:37.194556', 'step': 11786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:37.229520', 'step': 11786, 'epoch': 2} {'type': 'loss', 'content': 0.13443581759929657, 'timestamp': '2025-09-30 22:26:37.235542', 'step': 11787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.267946', 'step': 11787, 'epoch': 2} {'type': 'loss', 'content': 0.07957125455141068, 'timestamp': '2025-09-30 22:26:37.293577', 'step': 11788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.331440', 'step': 11788, 'epoch': 2} {'type': 'loss', 'content': 0.10618477314710617, 'timestamp': '2025-09-30 22:26:37.342511', 'step': 11789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:37.385247', 'step': 11789, 'epoch': 2} {'type': 'loss', 'content': 0.1761716902256012, 'timestamp': '2025-09-30 22:26:37.389862', 'step': 11790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:37.426593', 'step': 11790, 'epoch': 2} {'type': 'loss', 'content': 0.13383524119853973, 'timestamp': '2025-09-30 22:26:37.431919', 'step': 11791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.465636', 'step': 11791, 'epoch': 2} {'type': 'loss', 'content': 0.09343788772821426, 'timestamp': '2025-09-30 22:26:37.492803', 'step': 11792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.525483', 'step': 11792, 'epoch': 2} {'type': 'loss', 'content': 0.11204081773757935, 'timestamp': '2025-09-30 22:26:37.543037', 'step': 11793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:37.580272', 'step': 11793, 'epoch': 2} {'type': 'loss', 'content': 0.0605015754699707, 'timestamp': '2025-09-30 22:26:37.586132', 'step': 11794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.624343', 'step': 11794, 'epoch': 2} {'type': 'loss', 'content': 0.10079306364059448, 'timestamp': '2025-09-30 22:26:37.627974', 'step': 11795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:37.660947', 'step': 11795, 'epoch': 2} {'type': 'loss', 'content': 0.15298348665237427, 'timestamp': '2025-09-30 22:26:37.691387', 'step': 11796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:37.724941', 'step': 11796, 'epoch': 2} {'type': 'loss', 'content': 0.0800948515534401, 'timestamp': '2025-09-30 22:26:37.729776', 'step': 11797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.768499', 'step': 11797, 'epoch': 2} {'type': 'loss', 'content': 0.08010780066251755, 'timestamp': '2025-09-30 22:26:37.773020', 'step': 11798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.806496', 'step': 11798, 'epoch': 2} {'type': 'loss', 'content': 0.13039931654930115, 'timestamp': '2025-09-30 22:26:37.811238', 'step': 11799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.842804', 'step': 11799, 'epoch': 2} {'type': 'loss', 'content': 0.0750785544514656, 'timestamp': '2025-09-30 22:26:37.875385', 'step': 11800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:37.907444', 'step': 11800, 'epoch': 2} {'type': 'loss', 'content': 0.11251652985811234, 'timestamp': '2025-09-30 22:26:37.913272', 'step': 11801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:37.946570', 'step': 11801, 'epoch': 2} {'type': 'loss', 'content': 0.09744596481323242, 'timestamp': '2025-09-30 22:26:37.952589', 'step': 11802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:37.986265', 'step': 11802, 'epoch': 2} {'type': 'loss', 'content': 0.10495176166296005, 'timestamp': '2025-09-30 22:26:37.993709', 'step': 11803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.025840', 'step': 11803, 'epoch': 2} {'type': 'loss', 'content': 0.1486678421497345, 'timestamp': '2025-09-30 22:26:38.058541', 'step': 11804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.088992', 'step': 11804, 'epoch': 2} {'type': 'loss', 'content': 0.06828618794679642, 'timestamp': '2025-09-30 22:26:38.092183', 'step': 11805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.123329', 'step': 11805, 'epoch': 2} {'type': 'loss', 'content': 0.1572747379541397, 'timestamp': '2025-09-30 22:26:38.139175', 'step': 11806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:38.178822', 'step': 11806, 'epoch': 2} {'type': 'loss', 'content': 0.17569498717784882, 'timestamp': '2025-09-30 22:26:38.181891', 'step': 11807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.217179', 'step': 11807, 'epoch': 2} {'type': 'loss', 'content': 0.1527862548828125, 'timestamp': '2025-09-30 22:26:38.246223', 'step': 11808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:38.283425', 'step': 11808, 'epoch': 2} {'type': 'loss', 'content': 0.10507769882678986, 'timestamp': '2025-09-30 22:26:38.285876', 'step': 11809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.319056', 'step': 11809, 'epoch': 2} {'type': 'loss', 'content': 0.153472900390625, 'timestamp': '2025-09-30 22:26:38.327139', 'step': 11810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:38.363179', 'step': 11810, 'epoch': 2} {'type': 'loss', 'content': 0.08860137313604355, 'timestamp': '2025-09-30 22:26:38.384111', 'step': 11811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.425698', 'step': 11811, 'epoch': 2} {'type': 'loss', 'content': 0.11828821897506714, 'timestamp': '2025-09-30 22:26:38.458820', 'step': 11812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.503241', 'step': 11812, 'epoch': 2} {'type': 'loss', 'content': 0.0995827466249466, 'timestamp': '2025-09-30 22:26:38.507461', 'step': 11813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.548754', 'step': 11813, 'epoch': 2} {'type': 'loss', 'content': 0.10554096847772598, 'timestamp': '2025-09-30 22:26:38.555311', 'step': 11814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.592434', 'step': 11814, 'epoch': 2} {'type': 'loss', 'content': 0.16593170166015625, 'timestamp': '2025-09-30 22:26:38.605334', 'step': 11815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:38.636958', 'step': 11815, 'epoch': 2} {'type': 'loss', 'content': 0.041084207594394684, 'timestamp': '2025-09-30 22:26:38.675006', 'step': 11816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.713462', 'step': 11816, 'epoch': 2} {'type': 'loss', 'content': 0.15578624606132507, 'timestamp': '2025-09-30 22:26:38.718808', 'step': 11817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.752012', 'step': 11817, 'epoch': 2} {'type': 'loss', 'content': 0.1724211424589157, 'timestamp': '2025-09-30 22:26:38.763791', 'step': 11818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:38.800065', 'step': 11818, 'epoch': 2} {'type': 'loss', 'content': 0.1945313960313797, 'timestamp': '2025-09-30 22:26:38.805831', 'step': 11819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:38.840319', 'step': 11819, 'epoch': 2} {'type': 'loss', 'content': 0.034477584064006805, 'timestamp': '2025-09-30 22:26:38.868283', 'step': 11820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:38.898900', 'step': 11820, 'epoch': 2} {'type': 'loss', 'content': 0.030568113550543785, 'timestamp': '2025-09-30 22:26:38.902130', 'step': 11821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:38.932201', 'step': 11821, 'epoch': 2} {'type': 'loss', 'content': 0.143051877617836, 'timestamp': '2025-09-30 22:26:38.934663', 'step': 11822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:38.965528', 'step': 11822, 'epoch': 2} {'type': 'loss', 'content': 0.045946430414915085, 'timestamp': '2025-09-30 22:26:38.968347', 'step': 11823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:38.999459', 'step': 11823, 'epoch': 2} {'type': 'loss', 'content': 0.10548847913742065, 'timestamp': '2025-09-30 22:26:39.024895', 'step': 11824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:39.058742', 'step': 11824, 'epoch': 2} {'type': 'loss', 'content': 0.13475194573402405, 'timestamp': '2025-09-30 22:26:39.071682', 'step': 11825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.105342', 'step': 11825, 'epoch': 2} {'type': 'loss', 'content': 0.12206266075372696, 'timestamp': '2025-09-30 22:26:39.111835', 'step': 11826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.157819', 'step': 11826, 'epoch': 2} {'type': 'loss', 'content': 0.07830116897821426, 'timestamp': '2025-09-30 22:26:39.161412', 'step': 11827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:39.200280', 'step': 11827, 'epoch': 2} {'type': 'loss', 'content': 0.1877603381872177, 'timestamp': '2025-09-30 22:26:39.235521', 'step': 11828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.273890', 'step': 11828, 'epoch': 2} {'type': 'loss', 'content': 0.0983833521604538, 'timestamp': '2025-09-30 22:26:39.282885', 'step': 11829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:39.314171', 'step': 11829, 'epoch': 2} {'type': 'loss', 'content': 0.24225963652133942, 'timestamp': '2025-09-30 22:26:39.322147', 'step': 11830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.365988', 'step': 11830, 'epoch': 2} {'type': 'loss', 'content': 0.12241355329751968, 'timestamp': '2025-09-30 22:26:39.375836', 'step': 11831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:39.413552', 'step': 11831, 'epoch': 2} {'type': 'loss', 'content': 0.18580713868141174, 'timestamp': '2025-09-30 22:26:39.441196', 'step': 11832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.473027', 'step': 11832, 'epoch': 2} {'type': 'loss', 'content': 0.10734016448259354, 'timestamp': '2025-09-30 22:26:39.479691', 'step': 11833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:39.513352', 'step': 11833, 'epoch': 2} {'type': 'loss', 'content': 0.2095121294260025, 'timestamp': '2025-09-30 22:26:39.519251', 'step': 11834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:39.568057', 'step': 11834, 'epoch': 2} {'type': 'loss', 'content': 0.16081316769123077, 'timestamp': '2025-09-30 22:26:39.571507', 'step': 11835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:39.608103', 'step': 11835, 'epoch': 2} {'type': 'loss', 'content': 0.10088030993938446, 'timestamp': '2025-09-30 22:26:39.635413', 'step': 11836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:39.667439', 'step': 11836, 'epoch': 2} {'type': 'loss', 'content': 0.04971322789788246, 'timestamp': '2025-09-30 22:26:39.682414', 'step': 11837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.714469', 'step': 11837, 'epoch': 2} {'type': 'loss', 'content': 0.08558319509029388, 'timestamp': '2025-09-30 22:26:39.725504', 'step': 11838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:39.757730', 'step': 11838, 'epoch': 2} {'type': 'loss', 'content': 0.12421994656324387, 'timestamp': '2025-09-30 22:26:39.770254', 'step': 11839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:39.815919', 'step': 11839, 'epoch': 2} {'type': 'loss', 'content': 0.10650498420000076, 'timestamp': '2025-09-30 22:26:39.840964', 'step': 11840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:39.873718', 'step': 11840, 'epoch': 2} {'type': 'loss', 'content': 0.15789204835891724, 'timestamp': '2025-09-30 22:26:39.879336', 'step': 11841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.914891', 'step': 11841, 'epoch': 2} {'type': 'loss', 'content': 0.15173131227493286, 'timestamp': '2025-09-30 22:26:39.920718', 'step': 11842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:39.953666', 'step': 11842, 'epoch': 2} {'type': 'loss', 'content': 0.09909515827894211, 'timestamp': '2025-09-30 22:26:39.963891', 'step': 11843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.004197', 'step': 11843, 'epoch': 2} {'type': 'loss', 'content': 0.04331890866160393, 'timestamp': '2025-09-30 22:26:40.029916', 'step': 11844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.072791', 'step': 11844, 'epoch': 2} {'type': 'loss', 'content': 0.12759524583816528, 'timestamp': '2025-09-30 22:26:40.076971', 'step': 11845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.120266', 'step': 11845, 'epoch': 2} {'type': 'loss', 'content': 0.1411266177892685, 'timestamp': '2025-09-30 22:26:40.126141', 'step': 11846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.158735', 'step': 11846, 'epoch': 2} {'type': 'loss', 'content': 0.15103761851787567, 'timestamp': '2025-09-30 22:26:40.161823', 'step': 11847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.194060', 'step': 11847, 'epoch': 2} {'type': 'loss', 'content': 0.02566620334982872, 'timestamp': '2025-09-30 22:26:40.219168', 'step': 11848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.250498', 'step': 11848, 'epoch': 2} {'type': 'loss', 'content': 0.13030238449573517, 'timestamp': '2025-09-30 22:26:40.253897', 'step': 11849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.285612', 'step': 11849, 'epoch': 2} {'type': 'loss', 'content': 0.05580737441778183, 'timestamp': '2025-09-30 22:26:40.293634', 'step': 11850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:40.330609', 'step': 11850, 'epoch': 2} {'type': 'loss', 'content': 0.08840031921863556, 'timestamp': '2025-09-30 22:26:40.333718', 'step': 11851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:40.364166', 'step': 11851, 'epoch': 2} {'type': 'loss', 'content': 0.12545625865459442, 'timestamp': '2025-09-30 22:26:40.389348', 'step': 11852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.426724', 'step': 11852, 'epoch': 2} {'type': 'loss', 'content': 0.04120996966958046, 'timestamp': '2025-09-30 22:26:40.433375', 'step': 11853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.469587', 'step': 11853, 'epoch': 2} {'type': 'loss', 'content': 0.15724706649780273, 'timestamp': '2025-09-30 22:26:40.472333', 'step': 11854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.502894', 'step': 11854, 'epoch': 2} {'type': 'loss', 'content': 0.13837093114852905, 'timestamp': '2025-09-30 22:26:40.509194', 'step': 11855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.544495', 'step': 11855, 'epoch': 2} {'type': 'loss', 'content': 0.19437971711158752, 'timestamp': '2025-09-30 22:26:40.568599', 'step': 11856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:40.600921', 'step': 11856, 'epoch': 2} {'type': 'loss', 'content': 0.11961664259433746, 'timestamp': '2025-09-30 22:26:40.605894', 'step': 11857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.639083', 'step': 11857, 'epoch': 2} {'type': 'loss', 'content': 0.1265960931777954, 'timestamp': '2025-09-30 22:26:40.644865', 'step': 11858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.676150', 'step': 11858, 'epoch': 2} {'type': 'loss', 'content': 0.09062075614929199, 'timestamp': '2025-09-30 22:26:40.678985', 'step': 11859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.714558', 'step': 11859, 'epoch': 2} {'type': 'loss', 'content': 0.15193244814872742, 'timestamp': '2025-09-30 22:26:40.738764', 'step': 11860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.770688', 'step': 11860, 'epoch': 2} {'type': 'loss', 'content': 0.11254409700632095, 'timestamp': '2025-09-30 22:26:40.773584', 'step': 11861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:40.813477', 'step': 11861, 'epoch': 2} {'type': 'loss', 'content': 0.19740425050258636, 'timestamp': '2025-09-30 22:26:40.816311', 'step': 11862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:40.847039', 'step': 11862, 'epoch': 2} {'type': 'loss', 'content': 0.13132116198539734, 'timestamp': '2025-09-30 22:26:40.849779', 'step': 11863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:40.881846', 'step': 11863, 'epoch': 2} {'type': 'loss', 'content': 0.07642989605665207, 'timestamp': '2025-09-30 22:26:40.906647', 'step': 11864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:40.938548', 'step': 11864, 'epoch': 2} {'type': 'loss', 'content': 0.04439913108944893, 'timestamp': '2025-09-30 22:26:40.942754', 'step': 11865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:40.973456', 'step': 11865, 'epoch': 2} {'type': 'loss', 'content': 0.12622617185115814, 'timestamp': '2025-09-30 22:26:40.975945', 'step': 11866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.006266', 'step': 11866, 'epoch': 2} {'type': 'loss', 'content': 0.06680096685886383, 'timestamp': '2025-09-30 22:26:41.008424', 'step': 11867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:41.040945', 'step': 11867, 'epoch': 2} {'type': 'loss', 'content': 0.12668050825595856, 'timestamp': '2025-09-30 22:26:41.064845', 'step': 11868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:41.094948', 'step': 11868, 'epoch': 2} {'type': 'loss', 'content': 0.06188088282942772, 'timestamp': '2025-09-30 22:26:41.097605', 'step': 11869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.127796', 'step': 11869, 'epoch': 2} {'type': 'loss', 'content': 0.08876262605190277, 'timestamp': '2025-09-30 22:26:41.132452', 'step': 11870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:41.167324', 'step': 11870, 'epoch': 2} {'type': 'loss', 'content': 0.15440112352371216, 'timestamp': '2025-09-30 22:26:41.172641', 'step': 11871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:41.204061', 'step': 11871, 'epoch': 2} {'type': 'loss', 'content': 0.08913788199424744, 'timestamp': '2025-09-30 22:26:41.236100', 'step': 11872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:41.267876', 'step': 11872, 'epoch': 2} {'type': 'loss', 'content': 0.12434983998537064, 'timestamp': '2025-09-30 22:26:41.272350', 'step': 11873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.307265', 'step': 11873, 'epoch': 2} {'type': 'loss', 'content': 0.13031190633773804, 'timestamp': '2025-09-30 22:26:41.311599', 'step': 11874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.344749', 'step': 11874, 'epoch': 2} {'type': 'loss', 'content': 0.04204241558909416, 'timestamp': '2025-09-30 22:26:41.347382', 'step': 11875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.379258', 'step': 11875, 'epoch': 2} {'type': 'loss', 'content': 0.10232866555452347, 'timestamp': '2025-09-30 22:26:41.405872', 'step': 11876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.436728', 'step': 11876, 'epoch': 2} {'type': 'loss', 'content': 0.08762811124324799, 'timestamp': '2025-09-30 22:26:41.439694', 'step': 11877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.470333', 'step': 11877, 'epoch': 2} {'type': 'loss', 'content': 0.14080986380577087, 'timestamp': '2025-09-30 22:26:41.472949', 'step': 11878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.502610', 'step': 11878, 'epoch': 2} {'type': 'loss', 'content': 0.11764340102672577, 'timestamp': '2025-09-30 22:26:41.505383', 'step': 11879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.536561', 'step': 11879, 'epoch': 2} {'type': 'loss', 'content': 0.1472863405942917, 'timestamp': '2025-09-30 22:26:41.560554', 'step': 11880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.594945', 'step': 11880, 'epoch': 2} {'type': 'loss', 'content': 0.10633979737758636, 'timestamp': '2025-09-30 22:26:41.598418', 'step': 11881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:41.632708', 'step': 11881, 'epoch': 2} {'type': 'loss', 'content': 0.11588919907808304, 'timestamp': '2025-09-30 22:26:41.638615', 'step': 11882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.672670', 'step': 11882, 'epoch': 2} {'type': 'loss', 'content': 0.18263758718967438, 'timestamp': '2025-09-30 22:26:41.680851', 'step': 11883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:41.716289', 'step': 11883, 'epoch': 2} {'type': 'loss', 'content': 0.10931675136089325, 'timestamp': '2025-09-30 22:26:41.744421', 'step': 11884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.777398', 'step': 11884, 'epoch': 2} {'type': 'loss', 'content': 0.10743674635887146, 'timestamp': '2025-09-30 22:26:41.780029', 'step': 11885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.810685', 'step': 11885, 'epoch': 2} {'type': 'loss', 'content': 0.16251671314239502, 'timestamp': '2025-09-30 22:26:41.813838', 'step': 11886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:41.844086', 'step': 11886, 'epoch': 2} {'type': 'loss', 'content': 0.09467615187168121, 'timestamp': '2025-09-30 22:26:41.847306', 'step': 11887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:41.878108', 'step': 11887, 'epoch': 2} {'type': 'loss', 'content': 0.05674492567777634, 'timestamp': '2025-09-30 22:26:41.914649', 'step': 11888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.947192', 'step': 11888, 'epoch': 2} {'type': 'loss', 'content': 0.17031800746917725, 'timestamp': '2025-09-30 22:26:41.949948', 'step': 11889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:41.981727', 'step': 11889, 'epoch': 2} {'type': 'loss', 'content': 0.1104850172996521, 'timestamp': '2025-09-30 22:26:41.984938', 'step': 11890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:42.014922', 'step': 11890, 'epoch': 2} {'type': 'loss', 'content': 0.08945495635271072, 'timestamp': '2025-09-30 22:26:42.018710', 'step': 11891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.054045', 'step': 11891, 'epoch': 2} {'type': 'loss', 'content': 0.16361069679260254, 'timestamp': '2025-09-30 22:26:42.080453', 'step': 11892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.112583', 'step': 11892, 'epoch': 2} {'type': 'loss', 'content': 0.10224051028490067, 'timestamp': '2025-09-30 22:26:42.115815', 'step': 11893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.146385', 'step': 11893, 'epoch': 2} {'type': 'loss', 'content': 0.11481739580631256, 'timestamp': '2025-09-30 22:26:42.149300', 'step': 11894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.180868', 'step': 11894, 'epoch': 2} {'type': 'loss', 'content': 0.24349309504032135, 'timestamp': '2025-09-30 22:26:42.183788', 'step': 11895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.215024', 'step': 11895, 'epoch': 2} {'type': 'loss', 'content': 0.04273331165313721, 'timestamp': '2025-09-30 22:26:42.245643', 'step': 11896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.276789', 'step': 11896, 'epoch': 2} {'type': 'loss', 'content': 0.16209326684474945, 'timestamp': '2025-09-30 22:26:42.281559', 'step': 11897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.313612', 'step': 11897, 'epoch': 2} {'type': 'loss', 'content': 0.1130748987197876, 'timestamp': '2025-09-30 22:26:42.316120', 'step': 11898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:42.346731', 'step': 11898, 'epoch': 2} {'type': 'loss', 'content': 0.11239538341760635, 'timestamp': '2025-09-30 22:26:42.349877', 'step': 11899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.382713', 'step': 11899, 'epoch': 2} {'type': 'loss', 'content': 0.09027796238660812, 'timestamp': '2025-09-30 22:26:42.408434', 'step': 11900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:42.443165', 'step': 11900, 'epoch': 2} {'type': 'loss', 'content': 0.1990450620651245, 'timestamp': '2025-09-30 22:26:42.446233', 'step': 11901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.476760', 'step': 11901, 'epoch': 2} {'type': 'loss', 'content': 0.12520307302474976, 'timestamp': '2025-09-30 22:26:42.479732', 'step': 11902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.510355', 'step': 11902, 'epoch': 2} {'type': 'loss', 'content': 0.11457580327987671, 'timestamp': '2025-09-30 22:26:42.515731', 'step': 11903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.553544', 'step': 11903, 'epoch': 2} {'type': 'loss', 'content': 0.15003825724124908, 'timestamp': '2025-09-30 22:26:42.579858', 'step': 11904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:42.613103', 'step': 11904, 'epoch': 2} {'type': 'loss', 'content': 0.1339859962463379, 'timestamp': '2025-09-30 22:26:42.619736', 'step': 11905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.651326', 'step': 11905, 'epoch': 2} {'type': 'loss', 'content': 0.07839003205299377, 'timestamp': '2025-09-30 22:26:42.654773', 'step': 11906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.686673', 'step': 11906, 'epoch': 2} {'type': 'loss', 'content': 0.06338692456483841, 'timestamp': '2025-09-30 22:26:42.690694', 'step': 11907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.723657', 'step': 11907, 'epoch': 2} {'type': 'loss', 'content': 0.1277378648519516, 'timestamp': '2025-09-30 22:26:42.748985', 'step': 11908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:42.779305', 'step': 11908, 'epoch': 2} {'type': 'loss', 'content': 0.06585454195737839, 'timestamp': '2025-09-30 22:26:42.785354', 'step': 11909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:42.819568', 'step': 11909, 'epoch': 2} {'type': 'loss', 'content': 0.2082442343235016, 'timestamp': '2025-09-30 22:26:42.822845', 'step': 11910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:42.854564', 'step': 11910, 'epoch': 2} {'type': 'loss', 'content': 0.12982770800590515, 'timestamp': '2025-09-30 22:26:42.859906', 'step': 11911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:42.898164', 'step': 11911, 'epoch': 2} {'type': 'loss', 'content': 0.09485290199518204, 'timestamp': '2025-09-30 22:26:42.932356', 'step': 11912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:42.964666', 'step': 11912, 'epoch': 2} {'type': 'loss', 'content': 0.09943358600139618, 'timestamp': '2025-09-30 22:26:42.973618', 'step': 11913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.005591', 'step': 11913, 'epoch': 2} {'type': 'loss', 'content': 0.13287129998207092, 'timestamp': '2025-09-30 22:26:43.013175', 'step': 11914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:43.050941', 'step': 11914, 'epoch': 2} {'type': 'loss', 'content': 0.22800497710704803, 'timestamp': '2025-09-30 22:26:43.054293', 'step': 11915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.085538', 'step': 11915, 'epoch': 2} {'type': 'loss', 'content': 0.0758042261004448, 'timestamp': '2025-09-30 22:26:43.110286', 'step': 11916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.143860', 'step': 11916, 'epoch': 2} {'type': 'loss', 'content': 0.0550621896982193, 'timestamp': '2025-09-30 22:26:43.147677', 'step': 11917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.182280', 'step': 11917, 'epoch': 2} {'type': 'loss', 'content': 0.08617275953292847, 'timestamp': '2025-09-30 22:26:43.188638', 'step': 11918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.222748', 'step': 11918, 'epoch': 2} {'type': 'loss', 'content': 0.13132739067077637, 'timestamp': '2025-09-30 22:26:43.227176', 'step': 11919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.258586', 'step': 11919, 'epoch': 2} {'type': 'loss', 'content': 0.13180598616600037, 'timestamp': '2025-09-30 22:26:43.286107', 'step': 11920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.317704', 'step': 11920, 'epoch': 2} {'type': 'loss', 'content': 0.11272456496953964, 'timestamp': '2025-09-30 22:26:43.321066', 'step': 11921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.353328', 'step': 11921, 'epoch': 2} {'type': 'loss', 'content': 0.07809828221797943, 'timestamp': '2025-09-30 22:26:43.360952', 'step': 11922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.398607', 'step': 11922, 'epoch': 2} {'type': 'loss', 'content': 0.136474147439003, 'timestamp': '2025-09-30 22:26:43.401143', 'step': 11923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.431822', 'step': 11923, 'epoch': 2} {'type': 'loss', 'content': 0.14196212589740753, 'timestamp': '2025-09-30 22:26:43.462451', 'step': 11924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.493780', 'step': 11924, 'epoch': 2} {'type': 'loss', 'content': 0.06623323261737823, 'timestamp': '2025-09-30 22:26:43.498659', 'step': 11925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.540388', 'step': 11925, 'epoch': 2} {'type': 'loss', 'content': 0.12396273761987686, 'timestamp': '2025-09-30 22:26:43.543795', 'step': 11926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:43.575004', 'step': 11926, 'epoch': 2} {'type': 'loss', 'content': 0.11728091537952423, 'timestamp': '2025-09-30 22:26:43.580117', 'step': 11927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:26:43.612883', 'step': 11927, 'epoch': 2} {'type': 'loss', 'content': 0.09804025292396545, 'timestamp': '2025-09-30 22:26:43.640986', 'step': 11928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.673959', 'step': 11928, 'epoch': 2} {'type': 'loss', 'content': 0.038207583129405975, 'timestamp': '2025-09-30 22:26:43.678807', 'step': 11929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:43.712437', 'step': 11929, 'epoch': 2} {'type': 'loss', 'content': 0.09405199438333511, 'timestamp': '2025-09-30 22:26:43.715533', 'step': 11930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.749197', 'step': 11930, 'epoch': 2} {'type': 'loss', 'content': 0.14461441338062286, 'timestamp': '2025-09-30 22:26:43.752457', 'step': 11931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.788176', 'step': 11931, 'epoch': 2} {'type': 'loss', 'content': 0.09545959532260895, 'timestamp': '2025-09-30 22:26:43.812734', 'step': 11932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:43.843559', 'step': 11932, 'epoch': 2} {'type': 'loss', 'content': 0.20235827565193176, 'timestamp': '2025-09-30 22:26:43.850409', 'step': 11933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:43.883655', 'step': 11933, 'epoch': 2} {'type': 'loss', 'content': 0.06956788152456284, 'timestamp': '2025-09-30 22:26:43.889350', 'step': 11934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:43.922718', 'step': 11934, 'epoch': 2} {'type': 'loss', 'content': 0.14023464918136597, 'timestamp': '2025-09-30 22:26:43.925240', 'step': 11935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:43.956344', 'step': 11935, 'epoch': 2} {'type': 'loss', 'content': 0.07554502040147781, 'timestamp': '2025-09-30 22:26:43.984475', 'step': 11936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.014008', 'step': 11936, 'epoch': 2} {'type': 'loss', 'content': 0.08921901136636734, 'timestamp': '2025-09-30 22:26:44.017233', 'step': 11937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.051547', 'step': 11937, 'epoch': 2} {'type': 'loss', 'content': 0.06820890307426453, 'timestamp': '2025-09-30 22:26:44.054516', 'step': 11938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.086307', 'step': 11938, 'epoch': 2} {'type': 'loss', 'content': 0.06008119136095047, 'timestamp': '2025-09-30 22:26:44.089537', 'step': 11939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:44.124337', 'step': 11939, 'epoch': 2} {'type': 'loss', 'content': 0.1475643515586853, 'timestamp': '2025-09-30 22:26:44.150843', 'step': 11940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.183417', 'step': 11940, 'epoch': 2} {'type': 'loss', 'content': 0.12979434430599213, 'timestamp': '2025-09-30 22:26:44.186548', 'step': 11941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.217953', 'step': 11941, 'epoch': 2} {'type': 'loss', 'content': 0.07045450806617737, 'timestamp': '2025-09-30 22:26:44.220667', 'step': 11942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.250933', 'step': 11942, 'epoch': 2} {'type': 'loss', 'content': 0.06768301874399185, 'timestamp': '2025-09-30 22:26:44.261623', 'step': 11943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.303256', 'step': 11943, 'epoch': 2} {'type': 'loss', 'content': 0.11756404489278793, 'timestamp': '2025-09-30 22:26:44.328833', 'step': 11944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.359847', 'step': 11944, 'epoch': 2} {'type': 'loss', 'content': 0.029782529920339584, 'timestamp': '2025-09-30 22:26:44.362344', 'step': 11945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.393740', 'step': 11945, 'epoch': 2} {'type': 'loss', 'content': 0.14916197955608368, 'timestamp': '2025-09-30 22:26:44.401380', 'step': 11946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:44.440583', 'step': 11946, 'epoch': 2} {'type': 'loss', 'content': 0.13339704275131226, 'timestamp': '2025-09-30 22:26:44.444158', 'step': 11947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:44.475232', 'step': 11947, 'epoch': 2} {'type': 'loss', 'content': 0.06257961690425873, 'timestamp': '2025-09-30 22:26:44.500627', 'step': 11948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.532141', 'step': 11948, 'epoch': 2} {'type': 'loss', 'content': 0.06812439113855362, 'timestamp': '2025-09-30 22:26:44.535976', 'step': 11949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:44.569493', 'step': 11949, 'epoch': 2} {'type': 'loss', 'content': 0.09288796782493591, 'timestamp': '2025-09-30 22:26:44.573964', 'step': 11950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.604848', 'step': 11950, 'epoch': 2} {'type': 'loss', 'content': 0.09771384298801422, 'timestamp': '2025-09-30 22:26:44.607395', 'step': 11951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.639743', 'step': 11951, 'epoch': 2} {'type': 'loss', 'content': 0.051409732550382614, 'timestamp': '2025-09-30 22:26:44.664717', 'step': 11952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.696324', 'step': 11952, 'epoch': 2} {'type': 'loss', 'content': 0.11208843439817429, 'timestamp': '2025-09-30 22:26:44.699902', 'step': 11953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:44.732122', 'step': 11953, 'epoch': 2} {'type': 'loss', 'content': 0.08358344435691833, 'timestamp': '2025-09-30 22:26:44.742919', 'step': 11954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.790750', 'step': 11954, 'epoch': 2} {'type': 'loss', 'content': 0.066896453499794, 'timestamp': '2025-09-30 22:26:44.793409', 'step': 11955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.824612', 'step': 11955, 'epoch': 2} {'type': 'loss', 'content': 0.0900629311800003, 'timestamp': '2025-09-30 22:26:44.848963', 'step': 11956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:44.891280', 'step': 11956, 'epoch': 2} {'type': 'loss', 'content': 0.15990857779979706, 'timestamp': '2025-09-30 22:26:44.894198', 'step': 11957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:44.927627', 'step': 11957, 'epoch': 2} {'type': 'loss', 'content': 0.11833774298429489, 'timestamp': '2025-09-30 22:26:44.931019', 'step': 11958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:44.961888', 'step': 11958, 'epoch': 2} {'type': 'loss', 'content': 0.18485213816165924, 'timestamp': '2025-09-30 22:26:44.964915', 'step': 11959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:44.996673', 'step': 11959, 'epoch': 2} {'type': 'loss', 'content': 0.059565529227256775, 'timestamp': '2025-09-30 22:26:45.021412', 'step': 11960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.052178', 'step': 11960, 'epoch': 2} {'type': 'loss', 'content': 0.14365997910499573, 'timestamp': '2025-09-30 22:26:45.055471', 'step': 11961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:45.087932', 'step': 11961, 'epoch': 2} {'type': 'loss', 'content': 0.0848408117890358, 'timestamp': '2025-09-30 22:26:45.090895', 'step': 11962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.125858', 'step': 11962, 'epoch': 2} {'type': 'loss', 'content': 0.1901492476463318, 'timestamp': '2025-09-30 22:26:45.129344', 'step': 11963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.159800', 'step': 11963, 'epoch': 2} {'type': 'loss', 'content': 0.08048688620328903, 'timestamp': '2025-09-30 22:26:45.183885', 'step': 11964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.216721', 'step': 11964, 'epoch': 2} {'type': 'loss', 'content': 0.13316071033477783, 'timestamp': '2025-09-30 22:26:45.219703', 'step': 11965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.249936', 'step': 11965, 'epoch': 2} {'type': 'loss', 'content': 0.09049161523580551, 'timestamp': '2025-09-30 22:26:45.253158', 'step': 11966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.284983', 'step': 11966, 'epoch': 2} {'type': 'loss', 'content': 0.14755576848983765, 'timestamp': '2025-09-30 22:26:45.288137', 'step': 11967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.319234', 'step': 11967, 'epoch': 2} {'type': 'loss', 'content': 0.14756819605827332, 'timestamp': '2025-09-30 22:26:45.347456', 'step': 11968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:45.377099', 'step': 11968, 'epoch': 2} {'type': 'loss', 'content': 0.08463254570960999, 'timestamp': '2025-09-30 22:26:45.382681', 'step': 11969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.413354', 'step': 11969, 'epoch': 2} {'type': 'loss', 'content': 0.14067129790782928, 'timestamp': '2025-09-30 22:26:45.417038', 'step': 11970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:45.447653', 'step': 11970, 'epoch': 2} {'type': 'loss', 'content': 0.10197864472866058, 'timestamp': '2025-09-30 22:26:45.450041', 'step': 11971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:45.482786', 'step': 11971, 'epoch': 2} {'type': 'loss', 'content': 0.1376957893371582, 'timestamp': '2025-09-30 22:26:45.517858', 'step': 11972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.558307', 'step': 11972, 'epoch': 2} {'type': 'loss', 'content': 0.13725118339061737, 'timestamp': '2025-09-30 22:26:45.563422', 'step': 11973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.597070', 'step': 11973, 'epoch': 2} {'type': 'loss', 'content': 0.14718255400657654, 'timestamp': '2025-09-30 22:26:45.601165', 'step': 11974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.632757', 'step': 11974, 'epoch': 2} {'type': 'loss', 'content': 0.13259808719158173, 'timestamp': '2025-09-30 22:26:45.636651', 'step': 11975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.669849', 'step': 11975, 'epoch': 2} {'type': 'loss', 'content': 0.10852121561765671, 'timestamp': '2025-09-30 22:26:45.705103', 'step': 11976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.736395', 'step': 11976, 'epoch': 2} {'type': 'loss', 'content': 0.08028585463762283, 'timestamp': '2025-09-30 22:26:45.738997', 'step': 11977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.770187', 'step': 11977, 'epoch': 2} {'type': 'loss', 'content': 0.12845079600811005, 'timestamp': '2025-09-30 22:26:45.773055', 'step': 11978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:45.803319', 'step': 11978, 'epoch': 2} {'type': 'loss', 'content': 0.12011789530515671, 'timestamp': '2025-09-30 22:26:45.806746', 'step': 11979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:45.838791', 'step': 11979, 'epoch': 2} {'type': 'loss', 'content': 0.11359145492315292, 'timestamp': '2025-09-30 22:26:45.863401', 'step': 11980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:45.895166', 'step': 11980, 'epoch': 2} {'type': 'loss', 'content': 0.11134879291057587, 'timestamp': '2025-09-30 22:26:45.898889', 'step': 11981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.931208', 'step': 11981, 'epoch': 2} {'type': 'loss', 'content': 0.19340035319328308, 'timestamp': '2025-09-30 22:26:45.945050', 'step': 11982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:45.986611', 'step': 11982, 'epoch': 2} {'type': 'loss', 'content': 0.11118461936712265, 'timestamp': '2025-09-30 22:26:45.990231', 'step': 11983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:46.021784', 'step': 11983, 'epoch': 2} {'type': 'loss', 'content': 0.07566709816455841, 'timestamp': '2025-09-30 22:26:46.046460', 'step': 11984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:46.077718', 'step': 11984, 'epoch': 2} {'type': 'loss', 'content': 0.05481862276792526, 'timestamp': '2025-09-30 22:26:46.082694', 'step': 11985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:46.121819', 'step': 11985, 'epoch': 2} {'type': 'loss', 'content': 0.0989328920841217, 'timestamp': '2025-09-30 22:26:46.125015', 'step': 11986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:46.156324', 'step': 11986, 'epoch': 2} {'type': 'loss', 'content': 0.12362149357795715, 'timestamp': '2025-09-30 22:26:46.159380', 'step': 11987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:46.191889', 'step': 11987, 'epoch': 2} {'type': 'loss', 'content': 0.04910273477435112, 'timestamp': '2025-09-30 22:26:46.216617', 'step': 11988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:46.248543', 'step': 11988, 'epoch': 2} {'type': 'loss', 'content': 0.13846075534820557, 'timestamp': '2025-09-30 22:26:46.251515', 'step': 11989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:46.283081', 'step': 11989, 'epoch': 2} {'type': 'loss', 'content': 0.0924275740981102, 'timestamp': '2025-09-30 22:26:46.285761', 'step': 11990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:46.326286', 'step': 11990, 'epoch': 2} {'type': 'loss', 'content': 0.1184137836098671, 'timestamp': '2025-09-30 22:26:46.330112', 'step': 11991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:46.363587', 'step': 11991, 'epoch': 2} {'type': 'loss', 'content': 0.10234776884317398, 'timestamp': '2025-09-30 22:26:46.388798', 'step': 11992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:46.423609', 'step': 11992, 'epoch': 2} {'type': 'loss', 'content': 0.04936264827847481, 'timestamp': '2025-09-30 22:26:46.426272', 'step': 11993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:46.456713', 'step': 11993, 'epoch': 2} {'type': 'loss', 'content': 0.2444416880607605, 'timestamp': '2025-09-30 22:26:46.461117', 'step': 11994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:46.506759', 'step': 11994, 'epoch': 2} {'type': 'loss', 'content': 0.09234613925218582, 'timestamp': '2025-09-30 22:26:46.511514', 'step': 11995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:46.541370', 'step': 11995, 'epoch': 2} {'type': 'loss', 'content': 0.120734304189682, 'timestamp': '2025-09-30 22:26:46.566000', 'step': 11996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:46.596876', 'step': 11996, 'epoch': 2} {'type': 'loss', 'content': 0.10323987156152725, 'timestamp': '2025-09-30 22:26:46.601772', 'step': 11997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:46.634617', 'step': 11997, 'epoch': 2} {'type': 'loss', 'content': 0.17186538875102997, 'timestamp': '2025-09-30 22:26:46.640977', 'step': 11998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:46.677012', 'step': 11998, 'epoch': 2} {'type': 'loss', 'content': 0.07640958577394485, 'timestamp': '2025-09-30 22:26:46.681061', 'step': 11999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:46.714596', 'step': 11999, 'epoch': 2} {'type': 'loss', 'content': 0.09197857230901718, 'timestamp': '2025-09-30 22:26:46.741000', 'step': 12000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12000', 'timestamp': '2025-09-30 22:26:51.696934', 'step': 12000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:51.738802', 'step': 12000, 'epoch': 2} {'type': 'loss', 'content': 0.14931868016719818, 'timestamp': '2025-09-30 22:26:51.754254', 'step': 12001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:26:51.787534', 'step': 12001, 'epoch': 2} {'type': 'loss', 'content': 0.0589342936873436, 'timestamp': '2025-09-30 22:26:51.794009', 'step': 12002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:51.832891', 'step': 12002, 'epoch': 2} {'type': 'loss', 'content': 0.16990499198436737, 'timestamp': '2025-09-30 22:26:51.837550', 'step': 12003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:51.870028', 'step': 12003, 'epoch': 2} {'type': 'loss', 'content': 0.0460546500980854, 'timestamp': '2025-09-30 22:26:51.896347', 'step': 12004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:51.928019', 'step': 12004, 'epoch': 2} {'type': 'loss', 'content': 0.17566798627376556, 'timestamp': '2025-09-30 22:26:51.931068', 'step': 12005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:51.961587', 'step': 12005, 'epoch': 2} {'type': 'loss', 'content': 0.05237220600247383, 'timestamp': '2025-09-30 22:26:51.964951', 'step': 12006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:51.996268', 'step': 12006, 'epoch': 2} {'type': 'loss', 'content': 0.09027902036905289, 'timestamp': '2025-09-30 22:26:51.999036', 'step': 12007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.029534', 'step': 12007, 'epoch': 2} {'type': 'loss', 'content': 0.1163555458188057, 'timestamp': '2025-09-30 22:26:52.055035', 'step': 12008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:52.086852', 'step': 12008, 'epoch': 2} {'type': 'loss', 'content': 0.08547190576791763, 'timestamp': '2025-09-30 22:26:52.094459', 'step': 12009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:52.125159', 'step': 12009, 'epoch': 2} {'type': 'loss', 'content': 0.1526504009962082, 'timestamp': '2025-09-30 22:26:52.128930', 'step': 12010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.161435', 'step': 12010, 'epoch': 2} {'type': 'loss', 'content': 0.03696494176983833, 'timestamp': '2025-09-30 22:26:52.166152', 'step': 12011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:52.198389', 'step': 12011, 'epoch': 2} {'type': 'loss', 'content': 0.07795443385839462, 'timestamp': '2025-09-30 22:26:52.230136', 'step': 12012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.261399', 'step': 12012, 'epoch': 2} {'type': 'loss', 'content': 0.1045246571302414, 'timestamp': '2025-09-30 22:26:52.265914', 'step': 12013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.297657', 'step': 12013, 'epoch': 2} {'type': 'loss', 'content': 0.09882857650518417, 'timestamp': '2025-09-30 22:26:52.305128', 'step': 12014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.346722', 'step': 12014, 'epoch': 2} {'type': 'loss', 'content': 0.12615768611431122, 'timestamp': '2025-09-30 22:26:52.349493', 'step': 12015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:52.381168', 'step': 12015, 'epoch': 2} {'type': 'loss', 'content': 0.057417694479227066, 'timestamp': '2025-09-30 22:26:52.406393', 'step': 12016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.437308', 'step': 12016, 'epoch': 2} {'type': 'loss', 'content': 0.12711353600025177, 'timestamp': '2025-09-30 22:26:52.440341', 'step': 12017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:52.474030', 'step': 12017, 'epoch': 2} {'type': 'loss', 'content': 0.07217511534690857, 'timestamp': '2025-09-30 22:26:52.477457', 'step': 12018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:52.531950', 'step': 12018, 'epoch': 2} {'type': 'loss', 'content': 0.04605618491768837, 'timestamp': '2025-09-30 22:26:52.540995', 'step': 12019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.571659', 'step': 12019, 'epoch': 2} {'type': 'loss', 'content': 0.09585896134376526, 'timestamp': '2025-09-30 22:26:52.595691', 'step': 12020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.631059', 'step': 12020, 'epoch': 2} {'type': 'loss', 'content': 0.07227060198783875, 'timestamp': '2025-09-30 22:26:52.634401', 'step': 12021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.665642', 'step': 12021, 'epoch': 2} {'type': 'loss', 'content': 0.07277897745370865, 'timestamp': '2025-09-30 22:26:52.670708', 'step': 12022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.706495', 'step': 12022, 'epoch': 2} {'type': 'loss', 'content': 0.10410905629396439, 'timestamp': '2025-09-30 22:26:52.709052', 'step': 12023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.751805', 'step': 12023, 'epoch': 2} {'type': 'loss', 'content': 0.14963246881961823, 'timestamp': '2025-09-30 22:26:52.777731', 'step': 12024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.810443', 'step': 12024, 'epoch': 2} {'type': 'loss', 'content': 0.08704721927642822, 'timestamp': '2025-09-30 22:26:52.815286', 'step': 12025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:52.845860', 'step': 12025, 'epoch': 2} {'type': 'loss', 'content': 0.06857101619243622, 'timestamp': '2025-09-30 22:26:52.849928', 'step': 12026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.882383', 'step': 12026, 'epoch': 2} {'type': 'loss', 'content': 0.12266723811626434, 'timestamp': '2025-09-30 22:26:52.886929', 'step': 12027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.931163', 'step': 12027, 'epoch': 2} {'type': 'loss', 'content': 0.0720357596874237, 'timestamp': '2025-09-30 22:26:52.956371', 'step': 12028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:52.987723', 'step': 12028, 'epoch': 2} {'type': 'loss', 'content': 0.11414489150047302, 'timestamp': '2025-09-30 22:26:52.991450', 'step': 12029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:53.022898', 'step': 12029, 'epoch': 2} {'type': 'loss', 'content': 0.17069850862026215, 'timestamp': '2025-09-30 22:26:53.027254', 'step': 12030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.059779', 'step': 12030, 'epoch': 2} {'type': 'loss', 'content': 0.06680823117494583, 'timestamp': '2025-09-30 22:26:53.068304', 'step': 12031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:53.109838', 'step': 12031, 'epoch': 2} {'type': 'loss', 'content': 0.0885564312338829, 'timestamp': '2025-09-30 22:26:53.137283', 'step': 12032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:53.173572', 'step': 12032, 'epoch': 2} {'type': 'loss', 'content': 0.13067570328712463, 'timestamp': '2025-09-30 22:26:53.178838', 'step': 12033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:53.217358', 'step': 12033, 'epoch': 2} {'type': 'loss', 'content': 0.06600968539714813, 'timestamp': '2025-09-30 22:26:53.220048', 'step': 12034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.254361', 'step': 12034, 'epoch': 2} {'type': 'loss', 'content': 0.11257005482912064, 'timestamp': '2025-09-30 22:26:53.269883', 'step': 12035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.313049', 'step': 12035, 'epoch': 2} {'type': 'loss', 'content': 0.05978758633136749, 'timestamp': '2025-09-30 22:26:53.347808', 'step': 12036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:53.389840', 'step': 12036, 'epoch': 2} {'type': 'loss', 'content': 0.08198069781064987, 'timestamp': '2025-09-30 22:26:53.400426', 'step': 12037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:53.432893', 'step': 12037, 'epoch': 2} {'type': 'loss', 'content': 0.09375140070915222, 'timestamp': '2025-09-30 22:26:53.443455', 'step': 12038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:53.478077', 'step': 12038, 'epoch': 2} {'type': 'loss', 'content': 0.04612550511956215, 'timestamp': '2025-09-30 22:26:53.485271', 'step': 12039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.519307', 'step': 12039, 'epoch': 2} {'type': 'loss', 'content': 0.060238663107156754, 'timestamp': '2025-09-30 22:26:53.552332', 'step': 12040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:53.591042', 'step': 12040, 'epoch': 2} {'type': 'loss', 'content': 0.05954888463020325, 'timestamp': '2025-09-30 22:26:53.594853', 'step': 12041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:53.627271', 'step': 12041, 'epoch': 2} {'type': 'loss', 'content': 0.12653608620166779, 'timestamp': '2025-09-30 22:26:53.639661', 'step': 12042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:53.670920', 'step': 12042, 'epoch': 2} {'type': 'loss', 'content': 0.14649224281311035, 'timestamp': '2025-09-30 22:26:53.680391', 'step': 12043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:53.719659', 'step': 12043, 'epoch': 2} {'type': 'loss', 'content': 0.035484541207551956, 'timestamp': '2025-09-30 22:26:53.744649', 'step': 12044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.783209', 'step': 12044, 'epoch': 2} {'type': 'loss', 'content': 0.16448509693145752, 'timestamp': '2025-09-30 22:26:53.789230', 'step': 12045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.825880', 'step': 12045, 'epoch': 2} {'type': 'loss', 'content': 0.06663595139980316, 'timestamp': '2025-09-30 22:26:53.830335', 'step': 12046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:53.867635', 'step': 12046, 'epoch': 2} {'type': 'loss', 'content': 0.07235902547836304, 'timestamp': '2025-09-30 22:26:53.878331', 'step': 12047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:53.923724', 'step': 12047, 'epoch': 2} {'type': 'loss', 'content': 0.11420412361621857, 'timestamp': '2025-09-30 22:26:53.950206', 'step': 12048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:53.992428', 'step': 12048, 'epoch': 2} {'type': 'loss', 'content': 0.12512843310832977, 'timestamp': '2025-09-30 22:26:53.997491', 'step': 12049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.029426', 'step': 12049, 'epoch': 2} {'type': 'loss', 'content': 0.08632513135671616, 'timestamp': '2025-09-30 22:26:54.032131', 'step': 12050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:54.063797', 'step': 12050, 'epoch': 2} {'type': 'loss', 'content': 0.045633137226104736, 'timestamp': '2025-09-30 22:26:54.067978', 'step': 12051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:26:54.107275', 'step': 12051, 'epoch': 2} {'type': 'loss', 'content': 0.07994643598794937, 'timestamp': '2025-09-30 22:26:54.134026', 'step': 12052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.167273', 'step': 12052, 'epoch': 2} {'type': 'loss', 'content': 0.06530781090259552, 'timestamp': '2025-09-30 22:26:54.171460', 'step': 12053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.205033', 'step': 12053, 'epoch': 2} {'type': 'loss', 'content': 0.18240371346473694, 'timestamp': '2025-09-30 22:26:54.212446', 'step': 12054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:54.251163', 'step': 12054, 'epoch': 2} {'type': 'loss', 'content': 0.11809352040290833, 'timestamp': '2025-09-30 22:26:54.260890', 'step': 12055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.292006', 'step': 12055, 'epoch': 2} {'type': 'loss', 'content': 0.08087325841188431, 'timestamp': '2025-09-30 22:26:54.316734', 'step': 12056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.350690', 'step': 12056, 'epoch': 2} {'type': 'loss', 'content': 0.2070578783750534, 'timestamp': '2025-09-30 22:26:54.353874', 'step': 12057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.388696', 'step': 12057, 'epoch': 2} {'type': 'loss', 'content': 0.07006809115409851, 'timestamp': '2025-09-30 22:26:54.402439', 'step': 12058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:54.436512', 'step': 12058, 'epoch': 2} {'type': 'loss', 'content': 0.12795519828796387, 'timestamp': '2025-09-30 22:26:54.444469', 'step': 12059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:54.478936', 'step': 12059, 'epoch': 2} {'type': 'loss', 'content': 0.14935944974422455, 'timestamp': '2025-09-30 22:26:54.503724', 'step': 12060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.535401', 'step': 12060, 'epoch': 2} {'type': 'loss', 'content': 0.12516193091869354, 'timestamp': '2025-09-30 22:26:54.539252', 'step': 12061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:54.577896', 'step': 12061, 'epoch': 2} {'type': 'loss', 'content': 0.06930924206972122, 'timestamp': '2025-09-30 22:26:54.582460', 'step': 12062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:54.622323', 'step': 12062, 'epoch': 2} {'type': 'loss', 'content': 0.13118532299995422, 'timestamp': '2025-09-30 22:26:54.626105', 'step': 12063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:54.657576', 'step': 12063, 'epoch': 2} {'type': 'loss', 'content': 0.11064150184392929, 'timestamp': '2025-09-30 22:26:54.682722', 'step': 12064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.713399', 'step': 12064, 'epoch': 2} {'type': 'loss', 'content': 0.10148127377033234, 'timestamp': '2025-09-30 22:26:54.717653', 'step': 12065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.748274', 'step': 12065, 'epoch': 2} {'type': 'loss', 'content': 0.10621052980422974, 'timestamp': '2025-09-30 22:26:54.763229', 'step': 12066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:54.802006', 'step': 12066, 'epoch': 2} {'type': 'loss', 'content': 0.060908470302820206, 'timestamp': '2025-09-30 22:26:54.807399', 'step': 12067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.848300', 'step': 12067, 'epoch': 2} {'type': 'loss', 'content': 0.07858961075544357, 'timestamp': '2025-09-30 22:26:54.873257', 'step': 12068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.906162', 'step': 12068, 'epoch': 2} {'type': 'loss', 'content': 0.09083757549524307, 'timestamp': '2025-09-30 22:26:54.910348', 'step': 12069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:54.944170', 'step': 12069, 'epoch': 2} {'type': 'loss', 'content': 0.10597674548625946, 'timestamp': '2025-09-30 22:26:54.949224', 'step': 12070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:54.982965', 'step': 12070, 'epoch': 2} {'type': 'loss', 'content': 0.127485454082489, 'timestamp': '2025-09-30 22:26:54.993825', 'step': 12071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.025016', 'step': 12071, 'epoch': 2} {'type': 'loss', 'content': 0.09305162727832794, 'timestamp': '2025-09-30 22:26:55.049947', 'step': 12072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.082712', 'step': 12072, 'epoch': 2} {'type': 'loss', 'content': 0.22224360704421997, 'timestamp': '2025-09-30 22:26:55.087131', 'step': 12073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.118439', 'step': 12073, 'epoch': 2} {'type': 'loss', 'content': 0.10085224360227585, 'timestamp': '2025-09-30 22:26:55.121704', 'step': 12074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.154154', 'step': 12074, 'epoch': 2} {'type': 'loss', 'content': 0.07240750640630722, 'timestamp': '2025-09-30 22:26:55.159445', 'step': 12075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:55.190894', 'step': 12075, 'epoch': 2} {'type': 'loss', 'content': 0.06868457794189453, 'timestamp': '2025-09-30 22:26:55.218309', 'step': 12076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.250802', 'step': 12076, 'epoch': 2} {'type': 'loss', 'content': 0.06322827190160751, 'timestamp': '2025-09-30 22:26:55.255803', 'step': 12077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:55.288279', 'step': 12077, 'epoch': 2} {'type': 'loss', 'content': 0.04243583232164383, 'timestamp': '2025-09-30 22:26:55.290957', 'step': 12078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.322753', 'step': 12078, 'epoch': 2} {'type': 'loss', 'content': 0.09705285727977753, 'timestamp': '2025-09-30 22:26:55.326051', 'step': 12079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.357982', 'step': 12079, 'epoch': 2} {'type': 'loss', 'content': 0.21656657755374908, 'timestamp': '2025-09-30 22:26:55.392705', 'step': 12080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.425449', 'step': 12080, 'epoch': 2} {'type': 'loss', 'content': 0.0872931033372879, 'timestamp': '2025-09-30 22:26:55.443012', 'step': 12081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:55.479993', 'step': 12081, 'epoch': 2} {'type': 'loss', 'content': 0.09264575690031052, 'timestamp': '2025-09-30 22:26:55.483793', 'step': 12082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.515934', 'step': 12082, 'epoch': 2} {'type': 'loss', 'content': 0.06859247386455536, 'timestamp': '2025-09-30 22:26:55.526936', 'step': 12083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:55.558094', 'step': 12083, 'epoch': 2} {'type': 'loss', 'content': 0.07560259103775024, 'timestamp': '2025-09-30 22:26:55.583184', 'step': 12084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.623279', 'step': 12084, 'epoch': 2} {'type': 'loss', 'content': 0.23914870619773865, 'timestamp': '2025-09-30 22:26:55.626042', 'step': 12085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:55.666368', 'step': 12085, 'epoch': 2} {'type': 'loss', 'content': 0.1583031564950943, 'timestamp': '2025-09-30 22:26:55.670422', 'step': 12086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:26:55.702726', 'step': 12086, 'epoch': 2} {'type': 'loss', 'content': 0.12562888860702515, 'timestamp': '2025-09-30 22:26:55.705732', 'step': 12087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.737265', 'step': 12087, 'epoch': 2} {'type': 'loss', 'content': 0.05826064571738243, 'timestamp': '2025-09-30 22:26:55.762123', 'step': 12088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.800582', 'step': 12088, 'epoch': 2} {'type': 'loss', 'content': 0.1470491588115692, 'timestamp': '2025-09-30 22:26:55.803876', 'step': 12089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:55.835919', 'step': 12089, 'epoch': 2} {'type': 'loss', 'content': 0.14521433413028717, 'timestamp': '2025-09-30 22:26:55.839023', 'step': 12090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.870679', 'step': 12090, 'epoch': 2} {'type': 'loss', 'content': 0.0671226903796196, 'timestamp': '2025-09-30 22:26:55.884297', 'step': 12091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:55.924159', 'step': 12091, 'epoch': 2} {'type': 'loss', 'content': 0.09619931876659393, 'timestamp': '2025-09-30 22:26:55.949981', 'step': 12092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:55.985572', 'step': 12092, 'epoch': 2} {'type': 'loss', 'content': 0.15231049060821533, 'timestamp': '2025-09-30 22:26:55.989666', 'step': 12093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.022588', 'step': 12093, 'epoch': 2} {'type': 'loss', 'content': 0.08119035512208939, 'timestamp': '2025-09-30 22:26:56.039019', 'step': 12094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:56.071541', 'step': 12094, 'epoch': 2} {'type': 'loss', 'content': 0.080471932888031, 'timestamp': '2025-09-30 22:26:56.074251', 'step': 12095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:56.106271', 'step': 12095, 'epoch': 2} {'type': 'loss', 'content': 0.04914925992488861, 'timestamp': '2025-09-30 22:26:56.136988', 'step': 12096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.184997', 'step': 12096, 'epoch': 2} {'type': 'loss', 'content': 0.16919755935668945, 'timestamp': '2025-09-30 22:26:56.193241', 'step': 12097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.226086', 'step': 12097, 'epoch': 2} {'type': 'loss', 'content': 0.1428999900817871, 'timestamp': '2025-09-30 22:26:56.238236', 'step': 12098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.281724', 'step': 12098, 'epoch': 2} {'type': 'loss', 'content': 0.2616962790489197, 'timestamp': '2025-09-30 22:26:56.298157', 'step': 12099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:56.337467', 'step': 12099, 'epoch': 2} {'type': 'loss', 'content': 0.16351845860481262, 'timestamp': '2025-09-30 22:26:56.369891', 'step': 12100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.402535', 'step': 12100, 'epoch': 2} {'type': 'loss', 'content': 0.1144203171133995, 'timestamp': '2025-09-30 22:26:56.406751', 'step': 12101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:26:56.440883', 'step': 12101, 'epoch': 2} {'type': 'loss', 'content': 0.10432344675064087, 'timestamp': '2025-09-30 22:26:56.456235', 'step': 12102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.502882', 'step': 12102, 'epoch': 2} {'type': 'loss', 'content': 0.038224782794713974, 'timestamp': '2025-09-30 22:26:56.514308', 'step': 12103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.555101', 'step': 12103, 'epoch': 2} {'type': 'loss', 'content': 0.1494142860174179, 'timestamp': '2025-09-30 22:26:56.580835', 'step': 12104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:56.616709', 'step': 12104, 'epoch': 2} {'type': 'loss', 'content': 0.10710068047046661, 'timestamp': '2025-09-30 22:26:56.621832', 'step': 12105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.654503', 'step': 12105, 'epoch': 2} {'type': 'loss', 'content': 0.07291055470705032, 'timestamp': '2025-09-30 22:26:56.658541', 'step': 12106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.691527', 'step': 12106, 'epoch': 2} {'type': 'loss', 'content': 0.1366015374660492, 'timestamp': '2025-09-30 22:26:56.697119', 'step': 12107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:56.729444', 'step': 12107, 'epoch': 2} {'type': 'loss', 'content': 0.07567506283521652, 'timestamp': '2025-09-30 22:26:56.755515', 'step': 12108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:26:56.787882', 'step': 12108, 'epoch': 2} {'type': 'loss', 'content': 0.06980759650468826, 'timestamp': '2025-09-30 22:26:56.792863', 'step': 12109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.824933', 'step': 12109, 'epoch': 2} {'type': 'loss', 'content': 0.11126773804426193, 'timestamp': '2025-09-30 22:26:56.829044', 'step': 12110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.869019', 'step': 12110, 'epoch': 2} {'type': 'loss', 'content': 0.2035580426454544, 'timestamp': '2025-09-30 22:26:56.876098', 'step': 12111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:26:56.911178', 'step': 12111, 'epoch': 2} {'type': 'loss', 'content': 0.040832486003637314, 'timestamp': '2025-09-30 22:26:56.943746', 'step': 12112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:56.975284', 'step': 12112, 'epoch': 2} {'type': 'loss', 'content': 0.08480677753686905, 'timestamp': '2025-09-30 22:26:56.980475', 'step': 12113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:26:57.014379', 'step': 12113, 'epoch': 2} {'type': 'loss', 'content': 0.23488841950893402, 'timestamp': '2025-09-30 22:26:57.024374', 'step': 12114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:57.064131', 'step': 12114, 'epoch': 2} {'type': 'loss', 'content': 0.1202017217874527, 'timestamp': '2025-09-30 22:26:57.068107', 'step': 12115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:26:57.098678', 'step': 12115, 'epoch': 2} {'type': 'loss', 'content': 0.060641951858997345, 'timestamp': '2025-09-30 22:26:57.123868', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:27:05.544474', 'step': 12116, 'epoch': 2} {'type': 'pplx', 'content': 11276.816413664767, 'timestamp': '2025-09-30 22:27:05.548733', 'step': 12116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.588052', 'step': 12116, 'epoch': 2} {'type': 'loss', 'content': 0.08616387099027634, 'timestamp': '2025-09-30 22:27:05.595639', 'step': 12117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.635262', 'step': 12117, 'epoch': 2} {'type': 'loss', 'content': 0.10999356955289841, 'timestamp': '2025-09-30 22:27:05.640284', 'step': 12118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:05.683987', 'step': 12118, 'epoch': 2} {'type': 'loss', 'content': 0.0863037109375, 'timestamp': '2025-09-30 22:27:05.688112', 'step': 12119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:05.733987', 'step': 12119, 'epoch': 2} {'type': 'loss', 'content': 0.12840643525123596, 'timestamp': '2025-09-30 22:27:05.759464', 'step': 12120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.800990', 'step': 12120, 'epoch': 2} {'type': 'loss', 'content': 0.0687183365225792, 'timestamp': '2025-09-30 22:27:05.808013', 'step': 12121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.840588', 'step': 12121, 'epoch': 2} {'type': 'loss', 'content': 0.09226933866739273, 'timestamp': '2025-09-30 22:27:05.844635', 'step': 12122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:05.877314', 'step': 12122, 'epoch': 2} {'type': 'loss', 'content': 0.09886950999498367, 'timestamp': '2025-09-30 22:27:05.880382', 'step': 12123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.928266', 'step': 12123, 'epoch': 2} {'type': 'loss', 'content': 0.09798435121774673, 'timestamp': '2025-09-30 22:27:05.953692', 'step': 12124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:05.991849', 'step': 12124, 'epoch': 2} {'type': 'loss', 'content': 0.0701754167675972, 'timestamp': '2025-09-30 22:27:06.002621', 'step': 12125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.039965', 'step': 12125, 'epoch': 2} {'type': 'loss', 'content': 0.10618441551923752, 'timestamp': '2025-09-30 22:27:06.047109', 'step': 12126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:06.095162', 'step': 12126, 'epoch': 2} {'type': 'loss', 'content': 0.08128374069929123, 'timestamp': '2025-09-30 22:27:06.100893', 'step': 12127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.137078', 'step': 12127, 'epoch': 2} {'type': 'loss', 'content': 0.15522147715091705, 'timestamp': '2025-09-30 22:27:06.163063', 'step': 12128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:06.196724', 'step': 12128, 'epoch': 2} {'type': 'loss', 'content': 0.07493430376052856, 'timestamp': '2025-09-30 22:27:06.200489', 'step': 12129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:06.234436', 'step': 12129, 'epoch': 2} {'type': 'loss', 'content': 0.14034338295459747, 'timestamp': '2025-09-30 22:27:06.240952', 'step': 12130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.284596', 'step': 12130, 'epoch': 2} {'type': 'loss', 'content': 0.14480292797088623, 'timestamp': '2025-09-30 22:27:06.295951', 'step': 12131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.328585', 'step': 12131, 'epoch': 2} {'type': 'loss', 'content': 0.18287979066371918, 'timestamp': '2025-09-30 22:27:06.353456', 'step': 12132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:06.389568', 'step': 12132, 'epoch': 2} {'type': 'loss', 'content': 0.1291290670633316, 'timestamp': '2025-09-30 22:27:06.399147', 'step': 12133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:06.455171', 'step': 12133, 'epoch': 2} {'type': 'loss', 'content': 0.1334664523601532, 'timestamp': '2025-09-30 22:27:06.458649', 'step': 12134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:06.490340', 'step': 12134, 'epoch': 2} {'type': 'loss', 'content': 0.12017609179019928, 'timestamp': '2025-09-30 22:27:06.494537', 'step': 12135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.534320', 'step': 12135, 'epoch': 2} {'type': 'loss', 'content': 0.12272389233112335, 'timestamp': '2025-09-30 22:27:06.563953', 'step': 12136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:06.600268', 'step': 12136, 'epoch': 2} {'type': 'loss', 'content': 0.11985771358013153, 'timestamp': '2025-09-30 22:27:06.610166', 'step': 12137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.649603', 'step': 12137, 'epoch': 2} {'type': 'loss', 'content': 0.0993577241897583, 'timestamp': '2025-09-30 22:27:06.652863', 'step': 12138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.701504', 'step': 12138, 'epoch': 2} {'type': 'loss', 'content': 0.16064178943634033, 'timestamp': '2025-09-30 22:27:06.707997', 'step': 12139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:06.739533', 'step': 12139, 'epoch': 2} {'type': 'loss', 'content': 0.09297668188810349, 'timestamp': '2025-09-30 22:27:06.764797', 'step': 12140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:06.801550', 'step': 12140, 'epoch': 2} {'type': 'loss', 'content': 0.0812552347779274, 'timestamp': '2025-09-30 22:27:06.808346', 'step': 12141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:06.842073', 'step': 12141, 'epoch': 2} {'type': 'loss', 'content': 0.15024513006210327, 'timestamp': '2025-09-30 22:27:06.855539', 'step': 12142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:06.889272', 'step': 12142, 'epoch': 2} {'type': 'loss', 'content': 0.03256155550479889, 'timestamp': '2025-09-30 22:27:06.893238', 'step': 12143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:06.927257', 'step': 12143, 'epoch': 2} {'type': 'loss', 'content': 0.14000624418258667, 'timestamp': '2025-09-30 22:27:06.953647', 'step': 12144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:06.985908', 'step': 12144, 'epoch': 2} {'type': 'loss', 'content': 0.06912912428379059, 'timestamp': '2025-09-30 22:27:06.989993', 'step': 12145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.021662', 'step': 12145, 'epoch': 2} {'type': 'loss', 'content': 0.16843949258327484, 'timestamp': '2025-09-30 22:27:07.026786', 'step': 12146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:07.057743', 'step': 12146, 'epoch': 2} {'type': 'loss', 'content': 0.0993928536772728, 'timestamp': '2025-09-30 22:27:07.065001', 'step': 12147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.098568', 'step': 12147, 'epoch': 2} {'type': 'loss', 'content': 0.0781906321644783, 'timestamp': '2025-09-30 22:27:07.122874', 'step': 12148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.154155', 'step': 12148, 'epoch': 2} {'type': 'loss', 'content': 0.0440782755613327, 'timestamp': '2025-09-30 22:27:07.157623', 'step': 12149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:07.190268', 'step': 12149, 'epoch': 2} {'type': 'loss', 'content': 0.07774613797664642, 'timestamp': '2025-09-30 22:27:07.193364', 'step': 12150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:07.224223', 'step': 12150, 'epoch': 2} {'type': 'loss', 'content': 0.12376824766397476, 'timestamp': '2025-09-30 22:27:07.237664', 'step': 12151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:07.278326', 'step': 12151, 'epoch': 2} {'type': 'loss', 'content': 0.05126895755529404, 'timestamp': '2025-09-30 22:27:07.304012', 'step': 12152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:07.335954', 'step': 12152, 'epoch': 2} {'type': 'loss', 'content': 0.07546467334032059, 'timestamp': '2025-09-30 22:27:07.348674', 'step': 12153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.388448', 'step': 12153, 'epoch': 2} {'type': 'loss', 'content': 0.13003703951835632, 'timestamp': '2025-09-30 22:27:07.392672', 'step': 12154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.424766', 'step': 12154, 'epoch': 2} {'type': 'loss', 'content': 0.06710687279701233, 'timestamp': '2025-09-30 22:27:07.429320', 'step': 12155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:07.467485', 'step': 12155, 'epoch': 2} {'type': 'loss', 'content': 0.08772306144237518, 'timestamp': '2025-09-30 22:27:07.493217', 'step': 12156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:07.540396', 'step': 12156, 'epoch': 2} {'type': 'loss', 'content': 0.06350207328796387, 'timestamp': '2025-09-30 22:27:07.546927', 'step': 12157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:07.590834', 'step': 12157, 'epoch': 2} {'type': 'loss', 'content': 0.19001518189907074, 'timestamp': '2025-09-30 22:27:07.595388', 'step': 12158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:07.628414', 'step': 12158, 'epoch': 2} {'type': 'loss', 'content': 0.09264811873435974, 'timestamp': '2025-09-30 22:27:07.635493', 'step': 12159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.667170', 'step': 12159, 'epoch': 2} {'type': 'loss', 'content': 0.22765997052192688, 'timestamp': '2025-09-30 22:27:07.694930', 'step': 12160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:07.730095', 'step': 12160, 'epoch': 2} {'type': 'loss', 'content': 0.10560062527656555, 'timestamp': '2025-09-30 22:27:07.734970', 'step': 12161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.788288', 'step': 12161, 'epoch': 2} {'type': 'loss', 'content': 0.0511641688644886, 'timestamp': '2025-09-30 22:27:07.793509', 'step': 12162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:07.831709', 'step': 12162, 'epoch': 2} {'type': 'loss', 'content': 0.06635203957557678, 'timestamp': '2025-09-30 22:27:07.836699', 'step': 12163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:07.869755', 'step': 12163, 'epoch': 2} {'type': 'loss', 'content': 0.15005026757717133, 'timestamp': '2025-09-30 22:27:07.895441', 'step': 12164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:07.927167', 'step': 12164, 'epoch': 2} {'type': 'loss', 'content': 0.07611490041017532, 'timestamp': '2025-09-30 22:27:07.938007', 'step': 12165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:07.969770', 'step': 12165, 'epoch': 2} {'type': 'loss', 'content': 0.09644909203052521, 'timestamp': '2025-09-30 22:27:07.973225', 'step': 12166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.006182', 'step': 12166, 'epoch': 2} {'type': 'loss', 'content': 0.07922425121068954, 'timestamp': '2025-09-30 22:27:08.011497', 'step': 12167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:08.045707', 'step': 12167, 'epoch': 2} {'type': 'loss', 'content': 0.10446787625551224, 'timestamp': '2025-09-30 22:27:08.070451', 'step': 12168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:08.109383', 'step': 12168, 'epoch': 2} {'type': 'loss', 'content': 0.053215865045785904, 'timestamp': '2025-09-30 22:27:08.114044', 'step': 12169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.147028', 'step': 12169, 'epoch': 2} {'type': 'loss', 'content': 0.10820614546537399, 'timestamp': '2025-09-30 22:27:08.151448', 'step': 12170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:08.185357', 'step': 12170, 'epoch': 2} {'type': 'loss', 'content': 0.12919725477695465, 'timestamp': '2025-09-30 22:27:08.190930', 'step': 12171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.225178', 'step': 12171, 'epoch': 2} {'type': 'loss', 'content': 0.12882079184055328, 'timestamp': '2025-09-30 22:27:08.263901', 'step': 12172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.296810', 'step': 12172, 'epoch': 2} {'type': 'loss', 'content': 0.16925173997879028, 'timestamp': '2025-09-30 22:27:08.301255', 'step': 12173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:08.341495', 'step': 12173, 'epoch': 2} {'type': 'loss', 'content': 0.32046011090278625, 'timestamp': '2025-09-30 22:27:08.346818', 'step': 12174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:08.397851', 'step': 12174, 'epoch': 2} {'type': 'loss', 'content': 0.19911330938339233, 'timestamp': '2025-09-30 22:27:08.402467', 'step': 12175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.437169', 'step': 12175, 'epoch': 2} {'type': 'loss', 'content': 0.160600945353508, 'timestamp': '2025-09-30 22:27:08.463472', 'step': 12176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.497293', 'step': 12176, 'epoch': 2} {'type': 'loss', 'content': 0.13215553760528564, 'timestamp': '2025-09-30 22:27:08.501585', 'step': 12177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.532924', 'step': 12177, 'epoch': 2} {'type': 'loss', 'content': 0.10617067664861679, 'timestamp': '2025-09-30 22:27:08.544702', 'step': 12178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:08.584642', 'step': 12178, 'epoch': 2} {'type': 'loss', 'content': 0.14737002551555634, 'timestamp': '2025-09-30 22:27:08.588698', 'step': 12179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:08.620745', 'step': 12179, 'epoch': 2} {'type': 'loss', 'content': 0.02623072639107704, 'timestamp': '2025-09-30 22:27:08.646487', 'step': 12180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.678650', 'step': 12180, 'epoch': 2} {'type': 'loss', 'content': 0.18370357155799866, 'timestamp': '2025-09-30 22:27:08.688694', 'step': 12181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:08.729056', 'step': 12181, 'epoch': 2} {'type': 'loss', 'content': 0.07640828937292099, 'timestamp': '2025-09-30 22:27:08.733950', 'step': 12182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.765170', 'step': 12182, 'epoch': 2} {'type': 'loss', 'content': 0.05383412167429924, 'timestamp': '2025-09-30 22:27:08.768426', 'step': 12183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:08.810121', 'step': 12183, 'epoch': 2} {'type': 'loss', 'content': 0.035891078412532806, 'timestamp': '2025-09-30 22:27:08.836505', 'step': 12184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.868923', 'step': 12184, 'epoch': 2} {'type': 'loss', 'content': 0.05412901192903519, 'timestamp': '2025-09-30 22:27:08.877627', 'step': 12185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:08.908681', 'step': 12185, 'epoch': 2} {'type': 'loss', 'content': 0.25452691316604614, 'timestamp': '2025-09-30 22:27:08.919560', 'step': 12186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:08.953366', 'step': 12186, 'epoch': 2} {'type': 'loss', 'content': 0.18276053667068481, 'timestamp': '2025-09-30 22:27:08.963900', 'step': 12187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:08.996120', 'step': 12187, 'epoch': 2} {'type': 'loss', 'content': 0.1950879842042923, 'timestamp': '2025-09-30 22:27:09.022794', 'step': 12188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.055335', 'step': 12188, 'epoch': 2} {'type': 'loss', 'content': 0.03946472704410553, 'timestamp': '2025-09-30 22:27:09.059940', 'step': 12189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.090333', 'step': 12189, 'epoch': 2} {'type': 'loss', 'content': 0.026486696675419807, 'timestamp': '2025-09-30 22:27:09.094230', 'step': 12190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.131975', 'step': 12190, 'epoch': 2} {'type': 'loss', 'content': 0.10842468589544296, 'timestamp': '2025-09-30 22:27:09.135236', 'step': 12191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:09.170733', 'step': 12191, 'epoch': 2} {'type': 'loss', 'content': 0.10690681636333466, 'timestamp': '2025-09-30 22:27:09.201917', 'step': 12192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:09.232946', 'step': 12192, 'epoch': 2} {'type': 'loss', 'content': 0.10719560831785202, 'timestamp': '2025-09-30 22:27:09.236405', 'step': 12193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.274578', 'step': 12193, 'epoch': 2} {'type': 'loss', 'content': 0.0849684402346611, 'timestamp': '2025-09-30 22:27:09.278642', 'step': 12194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:09.318920', 'step': 12194, 'epoch': 2} {'type': 'loss', 'content': 0.2142847776412964, 'timestamp': '2025-09-30 22:27:09.321708', 'step': 12195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.353325', 'step': 12195, 'epoch': 2} {'type': 'loss', 'content': 0.09808381646871567, 'timestamp': '2025-09-30 22:27:09.377376', 'step': 12196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.407174', 'step': 12196, 'epoch': 2} {'type': 'loss', 'content': 0.042942531406879425, 'timestamp': '2025-09-30 22:27:09.420356', 'step': 12197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:09.457121', 'step': 12197, 'epoch': 2} {'type': 'loss', 'content': 0.11049969494342804, 'timestamp': '2025-09-30 22:27:09.461636', 'step': 12198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.492319', 'step': 12198, 'epoch': 2} {'type': 'loss', 'content': 0.0683932900428772, 'timestamp': '2025-09-30 22:27:09.495339', 'step': 12199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:09.526153', 'step': 12199, 'epoch': 2} {'type': 'loss', 'content': 0.0637449249625206, 'timestamp': '2025-09-30 22:27:09.551832', 'step': 12200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:09.583512', 'step': 12200, 'epoch': 2} {'type': 'loss', 'content': 0.14242538809776306, 'timestamp': '2025-09-30 22:27:09.588356', 'step': 12201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.623201', 'step': 12201, 'epoch': 2} {'type': 'loss', 'content': 0.05459744483232498, 'timestamp': '2025-09-30 22:27:09.626846', 'step': 12202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.658513', 'step': 12202, 'epoch': 2} {'type': 'loss', 'content': 0.12864544987678528, 'timestamp': '2025-09-30 22:27:09.662857', 'step': 12203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:09.694936', 'step': 12203, 'epoch': 2} {'type': 'loss', 'content': 0.06309843808412552, 'timestamp': '2025-09-30 22:27:09.730811', 'step': 12204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:09.763639', 'step': 12204, 'epoch': 2} {'type': 'loss', 'content': 0.07326260954141617, 'timestamp': '2025-09-30 22:27:09.767503', 'step': 12205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.798330', 'step': 12205, 'epoch': 2} {'type': 'loss', 'content': 0.20826400816440582, 'timestamp': '2025-09-30 22:27:09.802180', 'step': 12206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.842097', 'step': 12206, 'epoch': 2} {'type': 'loss', 'content': 0.10934945940971375, 'timestamp': '2025-09-30 22:27:09.847928', 'step': 12207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:09.890431', 'step': 12207, 'epoch': 2} {'type': 'loss', 'content': 0.06915687769651413, 'timestamp': '2025-09-30 22:27:09.915885', 'step': 12208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:09.948021', 'step': 12208, 'epoch': 2} {'type': 'loss', 'content': 0.1264926642179489, 'timestamp': '2025-09-30 22:27:09.953510', 'step': 12209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:09.986331', 'step': 12209, 'epoch': 2} {'type': 'loss', 'content': 0.05300460383296013, 'timestamp': '2025-09-30 22:27:09.991438', 'step': 12210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:10.023516', 'step': 12210, 'epoch': 2} {'type': 'loss', 'content': 0.15700040757656097, 'timestamp': '2025-09-30 22:27:10.028465', 'step': 12211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:10.059378', 'step': 12211, 'epoch': 2} {'type': 'loss', 'content': 0.11987323313951492, 'timestamp': '2025-09-30 22:27:10.083650', 'step': 12212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.119105', 'step': 12212, 'epoch': 2} {'type': 'loss', 'content': 0.16998189687728882, 'timestamp': '2025-09-30 22:27:10.123563', 'step': 12213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.155727', 'step': 12213, 'epoch': 2} {'type': 'loss', 'content': 0.15699826180934906, 'timestamp': '2025-09-30 22:27:10.161464', 'step': 12214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.194544', 'step': 12214, 'epoch': 2} {'type': 'loss', 'content': 0.14042384922504425, 'timestamp': '2025-09-30 22:27:10.208627', 'step': 12215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.247164', 'step': 12215, 'epoch': 2} {'type': 'loss', 'content': 0.14236240088939667, 'timestamp': '2025-09-30 22:27:10.272612', 'step': 12216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.310492', 'step': 12216, 'epoch': 2} {'type': 'loss', 'content': 0.13584940135478973, 'timestamp': '2025-09-30 22:27:10.313308', 'step': 12217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:10.345204', 'step': 12217, 'epoch': 2} {'type': 'loss', 'content': 0.10625556111335754, 'timestamp': '2025-09-30 22:27:10.353081', 'step': 12218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:10.391153', 'step': 12218, 'epoch': 2} {'type': 'loss', 'content': 0.1688871830701828, 'timestamp': '2025-09-30 22:27:10.394750', 'step': 12219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.456452', 'step': 12219, 'epoch': 2} {'type': 'loss', 'content': 0.08327073603868484, 'timestamp': '2025-09-30 22:27:10.481770', 'step': 12220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.515121', 'step': 12220, 'epoch': 2} {'type': 'loss', 'content': 0.15022383630275726, 'timestamp': '2025-09-30 22:27:10.517960', 'step': 12221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.549812', 'step': 12221, 'epoch': 2} {'type': 'loss', 'content': 0.0399007573723793, 'timestamp': '2025-09-30 22:27:10.563629', 'step': 12222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.600741', 'step': 12222, 'epoch': 2} {'type': 'loss', 'content': 0.029533375054597855, 'timestamp': '2025-09-30 22:27:10.605647', 'step': 12223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:10.645598', 'step': 12223, 'epoch': 2} {'type': 'loss', 'content': 0.10491176694631577, 'timestamp': '2025-09-30 22:27:10.671904', 'step': 12224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.706827', 'step': 12224, 'epoch': 2} {'type': 'loss', 'content': 0.10700157284736633, 'timestamp': '2025-09-30 22:27:10.710498', 'step': 12225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:10.742584', 'step': 12225, 'epoch': 2} {'type': 'loss', 'content': 0.26014602184295654, 'timestamp': '2025-09-30 22:27:10.755242', 'step': 12226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.797525', 'step': 12226, 'epoch': 2} {'type': 'loss', 'content': 0.07716551423072815, 'timestamp': '2025-09-30 22:27:10.807913', 'step': 12227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:10.845282', 'step': 12227, 'epoch': 2} {'type': 'loss', 'content': 0.17036527395248413, 'timestamp': '2025-09-30 22:27:10.872168', 'step': 12228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.906666', 'step': 12228, 'epoch': 2} {'type': 'loss', 'content': 0.09376783668994904, 'timestamp': '2025-09-30 22:27:10.913411', 'step': 12229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:10.951186', 'step': 12229, 'epoch': 2} {'type': 'loss', 'content': 0.14258533716201782, 'timestamp': '2025-09-30 22:27:10.955663', 'step': 12230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:10.988668', 'step': 12230, 'epoch': 2} {'type': 'loss', 'content': 0.08447577804327011, 'timestamp': '2025-09-30 22:27:10.992796', 'step': 12231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.023913', 'step': 12231, 'epoch': 2} {'type': 'loss', 'content': 0.03767016902565956, 'timestamp': '2025-09-30 22:27:11.058389', 'step': 12232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.091352', 'step': 12232, 'epoch': 2} {'type': 'loss', 'content': 0.16759471595287323, 'timestamp': '2025-09-30 22:27:11.107423', 'step': 12233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:11.140363', 'step': 12233, 'epoch': 2} {'type': 'loss', 'content': 0.1459716409444809, 'timestamp': '2025-09-30 22:27:11.150911', 'step': 12234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:11.186910', 'step': 12234, 'epoch': 2} {'type': 'loss', 'content': 0.16209790110588074, 'timestamp': '2025-09-30 22:27:11.189529', 'step': 12235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.226598', 'step': 12235, 'epoch': 2} {'type': 'loss', 'content': 0.056426484137773514, 'timestamp': '2025-09-30 22:27:11.252469', 'step': 12236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:11.284949', 'step': 12236, 'epoch': 2} {'type': 'loss', 'content': 0.1040155440568924, 'timestamp': '2025-09-30 22:27:11.303355', 'step': 12237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:11.342463', 'step': 12237, 'epoch': 2} {'type': 'loss', 'content': 0.05507056042551994, 'timestamp': '2025-09-30 22:27:11.352431', 'step': 12238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.389328', 'step': 12238, 'epoch': 2} {'type': 'loss', 'content': 0.106412373483181, 'timestamp': '2025-09-30 22:27:11.393863', 'step': 12239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:11.424880', 'step': 12239, 'epoch': 2} {'type': 'loss', 'content': 0.07936111837625504, 'timestamp': '2025-09-30 22:27:11.449804', 'step': 12240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:11.482104', 'step': 12240, 'epoch': 2} {'type': 'loss', 'content': 0.18533918261528015, 'timestamp': '2025-09-30 22:27:11.485646', 'step': 12241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:11.522121', 'step': 12241, 'epoch': 2} {'type': 'loss', 'content': 0.17229773104190826, 'timestamp': '2025-09-30 22:27:11.525627', 'step': 12242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:11.557435', 'step': 12242, 'epoch': 2} {'type': 'loss', 'content': 0.06364267319440842, 'timestamp': '2025-09-30 22:27:11.562093', 'step': 12243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.593600', 'step': 12243, 'epoch': 2} {'type': 'loss', 'content': 0.23841848969459534, 'timestamp': '2025-09-30 22:27:11.625603', 'step': 12244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:11.656977', 'step': 12244, 'epoch': 2} {'type': 'loss', 'content': 0.08580359816551208, 'timestamp': '2025-09-30 22:27:11.664293', 'step': 12245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.695912', 'step': 12245, 'epoch': 2} {'type': 'loss', 'content': 0.07758218795061111, 'timestamp': '2025-09-30 22:27:11.703406', 'step': 12246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.737931', 'step': 12246, 'epoch': 2} {'type': 'loss', 'content': 0.060306549072265625, 'timestamp': '2025-09-30 22:27:11.743758', 'step': 12247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.787589', 'step': 12247, 'epoch': 2} {'type': 'loss', 'content': 0.060047853738069534, 'timestamp': '2025-09-30 22:27:11.813306', 'step': 12248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:11.851071', 'step': 12248, 'epoch': 2} {'type': 'loss', 'content': 0.12114766240119934, 'timestamp': '2025-09-30 22:27:11.856767', 'step': 12249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.889857', 'step': 12249, 'epoch': 2} {'type': 'loss', 'content': 0.12197251617908478, 'timestamp': '2025-09-30 22:27:11.894046', 'step': 12250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:11.925105', 'step': 12250, 'epoch': 2} {'type': 'loss', 'content': 0.08311127871274948, 'timestamp': '2025-09-30 22:27:11.939504', 'step': 12251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:11.977507', 'step': 12251, 'epoch': 2} {'type': 'loss', 'content': 0.20900283753871918, 'timestamp': '2025-09-30 22:27:12.009109', 'step': 12252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.043275', 'step': 12252, 'epoch': 2} {'type': 'loss', 'content': 0.18756181001663208, 'timestamp': '2025-09-30 22:27:12.049551', 'step': 12253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.083045', 'step': 12253, 'epoch': 2} {'type': 'loss', 'content': 0.09304561465978622, 'timestamp': '2025-09-30 22:27:12.098454', 'step': 12254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:12.132242', 'step': 12254, 'epoch': 2} {'type': 'loss', 'content': 0.10593703389167786, 'timestamp': '2025-09-30 22:27:12.147112', 'step': 12255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.189590', 'step': 12255, 'epoch': 2} {'type': 'loss', 'content': 0.08644875884056091, 'timestamp': '2025-09-30 22:27:12.215813', 'step': 12256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:12.253769', 'step': 12256, 'epoch': 2} {'type': 'loss', 'content': 0.11881648749113083, 'timestamp': '2025-09-30 22:27:12.257550', 'step': 12257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.301647', 'step': 12257, 'epoch': 2} {'type': 'loss', 'content': 0.1409202516078949, 'timestamp': '2025-09-30 22:27:12.307136', 'step': 12258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.340176', 'step': 12258, 'epoch': 2} {'type': 'loss', 'content': 0.08248214423656464, 'timestamp': '2025-09-30 22:27:12.343506', 'step': 12259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.384666', 'step': 12259, 'epoch': 2} {'type': 'loss', 'content': 0.07783923298120499, 'timestamp': '2025-09-30 22:27:12.417537', 'step': 12260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:12.451825', 'step': 12260, 'epoch': 2} {'type': 'loss', 'content': 0.07154569774866104, 'timestamp': '2025-09-30 22:27:12.457460', 'step': 12261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.489392', 'step': 12261, 'epoch': 2} {'type': 'loss', 'content': 0.11554783582687378, 'timestamp': '2025-09-30 22:27:12.498246', 'step': 12262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.539107', 'step': 12262, 'epoch': 2} {'type': 'loss', 'content': 0.13549910485744476, 'timestamp': '2025-09-30 22:27:12.552051', 'step': 12263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:12.588523', 'step': 12263, 'epoch': 2} {'type': 'loss', 'content': 0.12979578971862793, 'timestamp': '2025-09-30 22:27:12.620711', 'step': 12264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.661276', 'step': 12264, 'epoch': 2} {'type': 'loss', 'content': 0.24103312194347382, 'timestamp': '2025-09-30 22:27:12.664440', 'step': 12265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.697902', 'step': 12265, 'epoch': 2} {'type': 'loss', 'content': 0.1251421719789505, 'timestamp': '2025-09-30 22:27:12.710867', 'step': 12266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.742134', 'step': 12266, 'epoch': 2} {'type': 'loss', 'content': 0.047617170959711075, 'timestamp': '2025-09-30 22:27:12.746889', 'step': 12267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:12.778945', 'step': 12267, 'epoch': 2} {'type': 'loss', 'content': 0.11721837520599365, 'timestamp': '2025-09-30 22:27:12.803100', 'step': 12268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:12.833397', 'step': 12268, 'epoch': 2} {'type': 'loss', 'content': 0.0819772407412529, 'timestamp': '2025-09-30 22:27:12.837751', 'step': 12269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:12.869502', 'step': 12269, 'epoch': 2} {'type': 'loss', 'content': 0.14803168177604675, 'timestamp': '2025-09-30 22:27:12.875125', 'step': 12270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:12.908008', 'step': 12270, 'epoch': 2} {'type': 'loss', 'content': 0.1374937742948532, 'timestamp': '2025-09-30 22:27:12.913252', 'step': 12271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:12.945271', 'step': 12271, 'epoch': 2} {'type': 'loss', 'content': 0.12935365736484528, 'timestamp': '2025-09-30 22:27:12.971921', 'step': 12272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.005709', 'step': 12272, 'epoch': 2} {'type': 'loss', 'content': 0.13819023966789246, 'timestamp': '2025-09-30 22:27:13.010574', 'step': 12273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.042737', 'step': 12273, 'epoch': 2} {'type': 'loss', 'content': 0.11783410608768463, 'timestamp': '2025-09-30 22:27:13.047273', 'step': 12274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.083234', 'step': 12274, 'epoch': 2} {'type': 'loss', 'content': 0.06526833027601242, 'timestamp': '2025-09-30 22:27:13.087410', 'step': 12275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.118169', 'step': 12275, 'epoch': 2} {'type': 'loss', 'content': 0.07408209145069122, 'timestamp': '2025-09-30 22:27:13.145605', 'step': 12276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.181895', 'step': 12276, 'epoch': 2} {'type': 'loss', 'content': 0.07674148678779602, 'timestamp': '2025-09-30 22:27:13.187031', 'step': 12277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.220612', 'step': 12277, 'epoch': 2} {'type': 'loss', 'content': 0.09060300886631012, 'timestamp': '2025-09-30 22:27:13.223887', 'step': 12278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.255228', 'step': 12278, 'epoch': 2} {'type': 'loss', 'content': 0.05270737037062645, 'timestamp': '2025-09-30 22:27:13.263706', 'step': 12279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:13.299216', 'step': 12279, 'epoch': 2} {'type': 'loss', 'content': 0.08570792526006699, 'timestamp': '2025-09-30 22:27:13.323977', 'step': 12280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.355307', 'step': 12280, 'epoch': 2} {'type': 'loss', 'content': 0.03497854247689247, 'timestamp': '2025-09-30 22:27:13.358106', 'step': 12281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.390057', 'step': 12281, 'epoch': 2} {'type': 'loss', 'content': 0.15573161840438843, 'timestamp': '2025-09-30 22:27:13.392373', 'step': 12282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:13.428271', 'step': 12282, 'epoch': 2} {'type': 'loss', 'content': 0.013171263970434666, 'timestamp': '2025-09-30 22:27:13.430924', 'step': 12283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:13.461936', 'step': 12283, 'epoch': 2} {'type': 'loss', 'content': 0.04303521662950516, 'timestamp': '2025-09-30 22:27:13.490385', 'step': 12284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.520680', 'step': 12284, 'epoch': 2} {'type': 'loss', 'content': 0.053419169038534164, 'timestamp': '2025-09-30 22:27:13.528916', 'step': 12285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.565623', 'step': 12285, 'epoch': 2} {'type': 'loss', 'content': 0.08326038718223572, 'timestamp': '2025-09-30 22:27:13.571541', 'step': 12286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.607618', 'step': 12286, 'epoch': 2} {'type': 'loss', 'content': 0.01976991817355156, 'timestamp': '2025-09-30 22:27:13.611552', 'step': 12287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:13.647348', 'step': 12287, 'epoch': 2} {'type': 'loss', 'content': 0.0851769745349884, 'timestamp': '2025-09-30 22:27:13.682512', 'step': 12288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:13.720159', 'step': 12288, 'epoch': 2} {'type': 'loss', 'content': 0.07102429121732712, 'timestamp': '2025-09-30 22:27:13.729344', 'step': 12289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.761362', 'step': 12289, 'epoch': 2} {'type': 'loss', 'content': 0.08408939093351364, 'timestamp': '2025-09-30 22:27:13.766173', 'step': 12290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:13.815545', 'step': 12290, 'epoch': 2} {'type': 'loss', 'content': 0.12022826075553894, 'timestamp': '2025-09-30 22:27:13.819592', 'step': 12291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:13.854925', 'step': 12291, 'epoch': 2} {'type': 'loss', 'content': 0.1386602520942688, 'timestamp': '2025-09-30 22:27:13.889008', 'step': 12292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:13.930192', 'step': 12292, 'epoch': 2} {'type': 'loss', 'content': 0.07417608797550201, 'timestamp': '2025-09-30 22:27:13.948492', 'step': 12293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:13.980672', 'step': 12293, 'epoch': 2} {'type': 'loss', 'content': 0.06732619553804398, 'timestamp': '2025-09-30 22:27:13.988028', 'step': 12294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.022723', 'step': 12294, 'epoch': 2} {'type': 'loss', 'content': 0.04602671414613724, 'timestamp': '2025-09-30 22:27:14.026626', 'step': 12295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:14.059042', 'step': 12295, 'epoch': 2} {'type': 'loss', 'content': 0.10458006709814072, 'timestamp': '2025-09-30 22:27:14.100546', 'step': 12296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.138712', 'step': 12296, 'epoch': 2} {'type': 'loss', 'content': 0.03994761034846306, 'timestamp': '2025-09-30 22:27:14.150174', 'step': 12297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:14.182415', 'step': 12297, 'epoch': 2} {'type': 'loss', 'content': 0.11698220670223236, 'timestamp': '2025-09-30 22:27:14.186484', 'step': 12298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.220677', 'step': 12298, 'epoch': 2} {'type': 'loss', 'content': 0.16521798074245453, 'timestamp': '2025-09-30 22:27:14.234636', 'step': 12299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.267768', 'step': 12299, 'epoch': 2} {'type': 'loss', 'content': 0.0863303542137146, 'timestamp': '2025-09-30 22:27:14.294084', 'step': 12300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:14.330082', 'step': 12300, 'epoch': 2} {'type': 'loss', 'content': 0.09970013052225113, 'timestamp': '2025-09-30 22:27:14.333814', 'step': 12301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.370826', 'step': 12301, 'epoch': 2} {'type': 'loss', 'content': 0.10628493875265121, 'timestamp': '2025-09-30 22:27:14.375322', 'step': 12302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:14.408019', 'step': 12302, 'epoch': 2} {'type': 'loss', 'content': 0.14804071187973022, 'timestamp': '2025-09-30 22:27:14.413954', 'step': 12303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.458483', 'step': 12303, 'epoch': 2} {'type': 'loss', 'content': 0.06599899381399155, 'timestamp': '2025-09-30 22:27:14.489715', 'step': 12304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.520398', 'step': 12304, 'epoch': 2} {'type': 'loss', 'content': 0.13264316320419312, 'timestamp': '2025-09-30 22:27:14.526410', 'step': 12305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.567651', 'step': 12305, 'epoch': 2} {'type': 'loss', 'content': 0.058069609105587006, 'timestamp': '2025-09-30 22:27:14.571297', 'step': 12306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.605350', 'step': 12306, 'epoch': 2} {'type': 'loss', 'content': 0.11002685874700546, 'timestamp': '2025-09-30 22:27:14.609284', 'step': 12307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:14.645223', 'step': 12307, 'epoch': 2} {'type': 'loss', 'content': 0.09546630084514618, 'timestamp': '2025-09-30 22:27:14.669896', 'step': 12308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.714115', 'step': 12308, 'epoch': 2} {'type': 'loss', 'content': 0.23083090782165527, 'timestamp': '2025-09-30 22:27:14.716654', 'step': 12309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:14.759606', 'step': 12309, 'epoch': 2} {'type': 'loss', 'content': 0.12430816143751144, 'timestamp': '2025-09-30 22:27:14.769520', 'step': 12310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:14.803550', 'step': 12310, 'epoch': 2} {'type': 'loss', 'content': 0.08665765076875687, 'timestamp': '2025-09-30 22:27:14.822355', 'step': 12311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:14.859743', 'step': 12311, 'epoch': 2} {'type': 'loss', 'content': 0.12211836874485016, 'timestamp': '2025-09-30 22:27:14.889102', 'step': 12312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:14.920932', 'step': 12312, 'epoch': 2} {'type': 'loss', 'content': 0.04192202165722847, 'timestamp': '2025-09-30 22:27:14.926557', 'step': 12313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:14.967173', 'step': 12313, 'epoch': 2} {'type': 'loss', 'content': 0.0867067351937294, 'timestamp': '2025-09-30 22:27:14.971354', 'step': 12314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.003028', 'step': 12314, 'epoch': 2} {'type': 'loss', 'content': 0.08725111931562424, 'timestamp': '2025-09-30 22:27:15.008900', 'step': 12315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:15.040998', 'step': 12315, 'epoch': 2} {'type': 'loss', 'content': 0.10632846504449844, 'timestamp': '2025-09-30 22:27:15.072273', 'step': 12316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:15.117101', 'step': 12316, 'epoch': 2} {'type': 'loss', 'content': 0.04289033263921738, 'timestamp': '2025-09-30 22:27:15.120080', 'step': 12317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:15.153269', 'step': 12317, 'epoch': 2} {'type': 'loss', 'content': 0.22254465520381927, 'timestamp': '2025-09-30 22:27:15.164522', 'step': 12318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.196909', 'step': 12318, 'epoch': 2} {'type': 'loss', 'content': 0.13554184138774872, 'timestamp': '2025-09-30 22:27:15.211484', 'step': 12319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.250521', 'step': 12319, 'epoch': 2} {'type': 'loss', 'content': 0.07414495199918747, 'timestamp': '2025-09-30 22:27:15.275348', 'step': 12320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.321733', 'step': 12320, 'epoch': 2} {'type': 'loss', 'content': 0.13427090644836426, 'timestamp': '2025-09-30 22:27:15.325394', 'step': 12321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.362558', 'step': 12321, 'epoch': 2} {'type': 'loss', 'content': 0.12059392780065536, 'timestamp': '2025-09-30 22:27:15.365829', 'step': 12322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:15.397711', 'step': 12322, 'epoch': 2} {'type': 'loss', 'content': 0.14675386250019073, 'timestamp': '2025-09-30 22:27:15.412770', 'step': 12323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:15.443979', 'step': 12323, 'epoch': 2} {'type': 'loss', 'content': 0.06101706251502037, 'timestamp': '2025-09-30 22:27:15.477348', 'step': 12324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:15.509036', 'step': 12324, 'epoch': 2} {'type': 'loss', 'content': 0.12940619885921478, 'timestamp': '2025-09-30 22:27:15.511568', 'step': 12325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:27:15.549000', 'step': 12325, 'epoch': 2} {'type': 'loss', 'content': 0.1551484763622284, 'timestamp': '2025-09-30 22:27:15.553381', 'step': 12326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.589569', 'step': 12326, 'epoch': 2} {'type': 'loss', 'content': 0.11861617118120193, 'timestamp': '2025-09-30 22:27:15.592606', 'step': 12327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:15.628826', 'step': 12327, 'epoch': 2} {'type': 'loss', 'content': 0.14288346469402313, 'timestamp': '2025-09-30 22:27:15.653123', 'step': 12328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.685777', 'step': 12328, 'epoch': 2} {'type': 'loss', 'content': 0.2482737898826599, 'timestamp': '2025-09-30 22:27:15.689011', 'step': 12329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.720631', 'step': 12329, 'epoch': 2} {'type': 'loss', 'content': 0.13555879890918732, 'timestamp': '2025-09-30 22:27:15.723760', 'step': 12330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.755958', 'step': 12330, 'epoch': 2} {'type': 'loss', 'content': 0.07135861366987228, 'timestamp': '2025-09-30 22:27:15.759841', 'step': 12331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.797422', 'step': 12331, 'epoch': 2} {'type': 'loss', 'content': 0.0773783028125763, 'timestamp': '2025-09-30 22:27:15.826761', 'step': 12332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.857179', 'step': 12332, 'epoch': 2} {'type': 'loss', 'content': 0.06303557753562927, 'timestamp': '2025-09-30 22:27:15.860356', 'step': 12333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.891778', 'step': 12333, 'epoch': 2} {'type': 'loss', 'content': 0.09447244554758072, 'timestamp': '2025-09-30 22:27:15.894309', 'step': 12334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:15.930553', 'step': 12334, 'epoch': 2} {'type': 'loss', 'content': 0.12187211960554123, 'timestamp': '2025-09-30 22:27:15.933302', 'step': 12335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:15.965550', 'step': 12335, 'epoch': 2} {'type': 'loss', 'content': 0.11111271381378174, 'timestamp': '2025-09-30 22:27:15.990083', 'step': 12336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:16.021827', 'step': 12336, 'epoch': 2} {'type': 'loss', 'content': 0.13628633320331573, 'timestamp': '2025-09-30 22:27:16.033818', 'step': 12337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:16.065331', 'step': 12337, 'epoch': 2} {'type': 'loss', 'content': 0.0831809937953949, 'timestamp': '2025-09-30 22:27:16.076276', 'step': 12338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.114336', 'step': 12338, 'epoch': 2} {'type': 'loss', 'content': 0.18252640962600708, 'timestamp': '2025-09-30 22:27:16.117547', 'step': 12339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.156696', 'step': 12339, 'epoch': 2} {'type': 'loss', 'content': 0.12659597396850586, 'timestamp': '2025-09-30 22:27:16.181347', 'step': 12340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.221202', 'step': 12340, 'epoch': 2} {'type': 'loss', 'content': 0.16214042901992798, 'timestamp': '2025-09-30 22:27:16.224565', 'step': 12341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.280660', 'step': 12341, 'epoch': 2} {'type': 'loss', 'content': 0.11752761900424957, 'timestamp': '2025-09-30 22:27:16.283820', 'step': 12342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:16.317715', 'step': 12342, 'epoch': 2} {'type': 'loss', 'content': 0.0806007832288742, 'timestamp': '2025-09-30 22:27:16.324856', 'step': 12343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:16.365358', 'step': 12343, 'epoch': 2} {'type': 'loss', 'content': 0.12012618035078049, 'timestamp': '2025-09-30 22:27:16.389748', 'step': 12344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:16.423456', 'step': 12344, 'epoch': 2} {'type': 'loss', 'content': 0.08162742853164673, 'timestamp': '2025-09-30 22:27:16.429187', 'step': 12345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.478029', 'step': 12345, 'epoch': 2} {'type': 'loss', 'content': 0.1362784206867218, 'timestamp': '2025-09-30 22:27:16.481459', 'step': 12346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.512101', 'step': 12346, 'epoch': 2} {'type': 'loss', 'content': 0.1065380796790123, 'timestamp': '2025-09-30 22:27:16.516719', 'step': 12347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.551305', 'step': 12347, 'epoch': 2} {'type': 'loss', 'content': 0.07685323059558868, 'timestamp': '2025-09-30 22:27:16.575851', 'step': 12348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.611626', 'step': 12348, 'epoch': 2} {'type': 'loss', 'content': 0.15025104582309723, 'timestamp': '2025-09-30 22:27:16.615357', 'step': 12349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.649826', 'step': 12349, 'epoch': 2} {'type': 'loss', 'content': 0.12707845866680145, 'timestamp': '2025-09-30 22:27:16.652253', 'step': 12350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:16.683153', 'step': 12350, 'epoch': 2} {'type': 'loss', 'content': 0.14099076390266418, 'timestamp': '2025-09-30 22:27:16.691360', 'step': 12351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.723803', 'step': 12351, 'epoch': 2} {'type': 'loss', 'content': 0.07954396307468414, 'timestamp': '2025-09-30 22:27:16.752145', 'step': 12352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.782359', 'step': 12352, 'epoch': 2} {'type': 'loss', 'content': 0.18596190214157104, 'timestamp': '2025-09-30 22:27:16.790422', 'step': 12353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:16.822486', 'step': 12353, 'epoch': 2} {'type': 'loss', 'content': 0.1483834981918335, 'timestamp': '2025-09-30 22:27:16.827683', 'step': 12354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:16.859869', 'step': 12354, 'epoch': 2} {'type': 'loss', 'content': 0.16081050038337708, 'timestamp': '2025-09-30 22:27:16.863242', 'step': 12355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.895460', 'step': 12355, 'epoch': 2} {'type': 'loss', 'content': 0.06548832356929779, 'timestamp': '2025-09-30 22:27:16.919735', 'step': 12356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:16.950571', 'step': 12356, 'epoch': 2} {'type': 'loss', 'content': 0.11465799808502197, 'timestamp': '2025-09-30 22:27:16.956357', 'step': 12357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:16.987960', 'step': 12357, 'epoch': 2} {'type': 'loss', 'content': 0.06265709549188614, 'timestamp': '2025-09-30 22:27:16.990766', 'step': 12358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:17.033071', 'step': 12358, 'epoch': 2} {'type': 'loss', 'content': 0.14605477452278137, 'timestamp': '2025-09-30 22:27:17.035960', 'step': 12359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:17.068861', 'step': 12359, 'epoch': 2} {'type': 'loss', 'content': 0.12624427676200867, 'timestamp': '2025-09-30 22:27:17.093454', 'step': 12360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:17.124657', 'step': 12360, 'epoch': 2} {'type': 'loss', 'content': 0.08226923644542694, 'timestamp': '2025-09-30 22:27:17.130107', 'step': 12361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:17.172429', 'step': 12361, 'epoch': 2} {'type': 'loss', 'content': 0.12704052031040192, 'timestamp': '2025-09-30 22:27:17.175303', 'step': 12362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:17.206084', 'step': 12362, 'epoch': 2} {'type': 'loss', 'content': 0.12429165840148926, 'timestamp': '2025-09-30 22:27:17.211660', 'step': 12363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.245064', 'step': 12363, 'epoch': 2} {'type': 'loss', 'content': 0.07082926481962204, 'timestamp': '2025-09-30 22:27:17.271115', 'step': 12364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.305406', 'step': 12364, 'epoch': 2} {'type': 'loss', 'content': 0.09896846860647202, 'timestamp': '2025-09-30 22:27:17.308047', 'step': 12365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:17.339758', 'step': 12365, 'epoch': 2} {'type': 'loss', 'content': 0.1016484722495079, 'timestamp': '2025-09-30 22:27:17.342846', 'step': 12366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.378330', 'step': 12366, 'epoch': 2} {'type': 'loss', 'content': 0.1041586771607399, 'timestamp': '2025-09-30 22:27:17.383686', 'step': 12367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:17.414594', 'step': 12367, 'epoch': 2} {'type': 'loss', 'content': 0.112056203186512, 'timestamp': '2025-09-30 22:27:17.439223', 'step': 12368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:17.469182', 'step': 12368, 'epoch': 2} {'type': 'loss', 'content': 0.09183009713888168, 'timestamp': '2025-09-30 22:27:17.472154', 'step': 12369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:17.503411', 'step': 12369, 'epoch': 2} {'type': 'loss', 'content': 0.09879250824451447, 'timestamp': '2025-09-30 22:27:17.509257', 'step': 12370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.548391', 'step': 12370, 'epoch': 2} {'type': 'loss', 'content': 0.1393628567457199, 'timestamp': '2025-09-30 22:27:17.552874', 'step': 12371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.585937', 'step': 12371, 'epoch': 2} {'type': 'loss', 'content': 0.12664325535297394, 'timestamp': '2025-09-30 22:27:17.610097', 'step': 12372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:17.641721', 'step': 12372, 'epoch': 2} {'type': 'loss', 'content': 0.09206545352935791, 'timestamp': '2025-09-30 22:27:17.647203', 'step': 12373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.689670', 'step': 12373, 'epoch': 2} {'type': 'loss', 'content': 0.1877680867910385, 'timestamp': '2025-09-30 22:27:17.695288', 'step': 12374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:17.729407', 'step': 12374, 'epoch': 2} {'type': 'loss', 'content': 0.15124721825122833, 'timestamp': '2025-09-30 22:27:17.734647', 'step': 12375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.768088', 'step': 12375, 'epoch': 2} {'type': 'loss', 'content': 0.09851442277431488, 'timestamp': '2025-09-30 22:27:17.795218', 'step': 12376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.831869', 'step': 12376, 'epoch': 2} {'type': 'loss', 'content': 0.13885866105556488, 'timestamp': '2025-09-30 22:27:17.842365', 'step': 12377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.877262', 'step': 12377, 'epoch': 2} {'type': 'loss', 'content': 0.07733698189258575, 'timestamp': '2025-09-30 22:27:17.880256', 'step': 12378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:17.914449', 'step': 12378, 'epoch': 2} {'type': 'loss', 'content': 0.09304478764533997, 'timestamp': '2025-09-30 22:27:17.919510', 'step': 12379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:17.951787', 'step': 12379, 'epoch': 2} {'type': 'loss', 'content': 0.08783352375030518, 'timestamp': '2025-09-30 22:27:17.976194', 'step': 12380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:18.007552', 'step': 12380, 'epoch': 2} {'type': 'loss', 'content': 0.10162113606929779, 'timestamp': '2025-09-30 22:27:18.010024', 'step': 12381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.040205', 'step': 12381, 'epoch': 2} {'type': 'loss', 'content': 0.087894007563591, 'timestamp': '2025-09-30 22:27:18.045023', 'step': 12382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.082990', 'step': 12382, 'epoch': 2} {'type': 'loss', 'content': 0.1451847404241562, 'timestamp': '2025-09-30 22:27:18.088018', 'step': 12383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.124688', 'step': 12383, 'epoch': 2} {'type': 'loss', 'content': 0.07589457184076309, 'timestamp': '2025-09-30 22:27:18.150518', 'step': 12384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.180697', 'step': 12384, 'epoch': 2} {'type': 'loss', 'content': 0.1251622438430786, 'timestamp': '2025-09-30 22:27:18.183398', 'step': 12385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.214188', 'step': 12385, 'epoch': 2} {'type': 'loss', 'content': 0.12031713128089905, 'timestamp': '2025-09-30 22:27:18.219167', 'step': 12386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:18.251458', 'step': 12386, 'epoch': 2} {'type': 'loss', 'content': 0.21147871017456055, 'timestamp': '2025-09-30 22:27:18.254142', 'step': 12387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.285431', 'step': 12387, 'epoch': 2} {'type': 'loss', 'content': 0.1215095967054367, 'timestamp': '2025-09-30 22:27:18.318346', 'step': 12388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.356512', 'step': 12388, 'epoch': 2} {'type': 'loss', 'content': 0.12063400447368622, 'timestamp': '2025-09-30 22:27:18.363947', 'step': 12389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:18.396228', 'step': 12389, 'epoch': 2} {'type': 'loss', 'content': 0.12668371200561523, 'timestamp': '2025-09-30 22:27:18.398655', 'step': 12390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.428676', 'step': 12390, 'epoch': 2} {'type': 'loss', 'content': 0.07561302185058594, 'timestamp': '2025-09-30 22:27:18.433386', 'step': 12391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.464050', 'step': 12391, 'epoch': 2} {'type': 'loss', 'content': 0.12865598499774933, 'timestamp': '2025-09-30 22:27:18.489112', 'step': 12392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.522291', 'step': 12392, 'epoch': 2} {'type': 'loss', 'content': 0.03367527946829796, 'timestamp': '2025-09-30 22:27:18.524550', 'step': 12393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.555262', 'step': 12393, 'epoch': 2} {'type': 'loss', 'content': 0.1682150959968567, 'timestamp': '2025-09-30 22:27:18.557588', 'step': 12394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:18.594367', 'step': 12394, 'epoch': 2} {'type': 'loss', 'content': 0.09196600317955017, 'timestamp': '2025-09-30 22:27:18.598259', 'step': 12395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:18.630387', 'step': 12395, 'epoch': 2} {'type': 'loss', 'content': 0.13688525557518005, 'timestamp': '2025-09-30 22:27:18.654233', 'step': 12396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:18.686002', 'step': 12396, 'epoch': 2} {'type': 'loss', 'content': 0.1291601061820984, 'timestamp': '2025-09-30 22:27:18.691865', 'step': 12397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.728183', 'step': 12397, 'epoch': 2} {'type': 'loss', 'content': 0.14760518074035645, 'timestamp': '2025-09-30 22:27:18.730821', 'step': 12398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.762175', 'step': 12398, 'epoch': 2} {'type': 'loss', 'content': 0.10161911696195602, 'timestamp': '2025-09-30 22:27:18.765206', 'step': 12399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.795453', 'step': 12399, 'epoch': 2} {'type': 'loss', 'content': 0.0631766989827156, 'timestamp': '2025-09-30 22:27:18.819729', 'step': 12400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.850846', 'step': 12400, 'epoch': 2} {'type': 'loss', 'content': 0.1056557223200798, 'timestamp': '2025-09-30 22:27:18.853520', 'step': 12401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:18.885179', 'step': 12401, 'epoch': 2} {'type': 'loss', 'content': 0.1806117296218872, 'timestamp': '2025-09-30 22:27:18.888781', 'step': 12402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:18.920964', 'step': 12402, 'epoch': 2} {'type': 'loss', 'content': 0.050359826534986496, 'timestamp': '2025-09-30 22:27:18.926675', 'step': 12403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:18.959925', 'step': 12403, 'epoch': 2} {'type': 'loss', 'content': 0.23440125584602356, 'timestamp': '2025-09-30 22:27:18.985022', 'step': 12404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.015799', 'step': 12404, 'epoch': 2} {'type': 'loss', 'content': 0.12353316694498062, 'timestamp': '2025-09-30 22:27:19.018613', 'step': 12405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:19.049085', 'step': 12405, 'epoch': 2} {'type': 'loss', 'content': 0.22178828716278076, 'timestamp': '2025-09-30 22:27:19.051981', 'step': 12406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:19.083449', 'step': 12406, 'epoch': 2} {'type': 'loss', 'content': 0.14980697631835938, 'timestamp': '2025-09-30 22:27:19.085977', 'step': 12407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.119029', 'step': 12407, 'epoch': 2} {'type': 'loss', 'content': 0.09068138897418976, 'timestamp': '2025-09-30 22:27:19.142995', 'step': 12408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.172827', 'step': 12408, 'epoch': 2} {'type': 'loss', 'content': 0.1308327168226242, 'timestamp': '2025-09-30 22:27:19.178232', 'step': 12409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.209039', 'step': 12409, 'epoch': 2} {'type': 'loss', 'content': 0.05551443621516228, 'timestamp': '2025-09-30 22:27:19.211499', 'step': 12410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.243978', 'step': 12410, 'epoch': 2} {'type': 'loss', 'content': 0.06656483560800552, 'timestamp': '2025-09-30 22:27:19.247727', 'step': 12411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:19.279866', 'step': 12411, 'epoch': 2} {'type': 'loss', 'content': 0.11042916029691696, 'timestamp': '2025-09-30 22:27:19.303533', 'step': 12412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.347569', 'step': 12412, 'epoch': 2} {'type': 'loss', 'content': 0.13689324259757996, 'timestamp': '2025-09-30 22:27:19.350269', 'step': 12413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.380928', 'step': 12413, 'epoch': 2} {'type': 'loss', 'content': 0.09194059669971466, 'timestamp': '2025-09-30 22:27:19.384033', 'step': 12414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:19.416333', 'step': 12414, 'epoch': 2} {'type': 'loss', 'content': 0.1019614040851593, 'timestamp': '2025-09-30 22:27:19.424442', 'step': 12415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.461520', 'step': 12415, 'epoch': 2} {'type': 'loss', 'content': 0.11727926135063171, 'timestamp': '2025-09-30 22:27:19.485749', 'step': 12416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:19.523907', 'step': 12416, 'epoch': 2} {'type': 'loss', 'content': 0.10565826296806335, 'timestamp': '2025-09-30 22:27:19.527179', 'step': 12417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.562623', 'step': 12417, 'epoch': 2} {'type': 'loss', 'content': 0.09829459339380264, 'timestamp': '2025-09-30 22:27:19.566254', 'step': 12418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.597677', 'step': 12418, 'epoch': 2} {'type': 'loss', 'content': 0.1304323375225067, 'timestamp': '2025-09-30 22:27:19.604044', 'step': 12419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.636465', 'step': 12419, 'epoch': 2} {'type': 'loss', 'content': 0.13024687767028809, 'timestamp': '2025-09-30 22:27:19.660504', 'step': 12420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:19.692175', 'step': 12420, 'epoch': 2} {'type': 'loss', 'content': 0.09223064035177231, 'timestamp': '2025-09-30 22:27:19.698150', 'step': 12421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.732551', 'step': 12421, 'epoch': 2} {'type': 'loss', 'content': 0.14312054216861725, 'timestamp': '2025-09-30 22:27:19.735271', 'step': 12422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.769565', 'step': 12422, 'epoch': 2} {'type': 'loss', 'content': 0.06997254490852356, 'timestamp': '2025-09-30 22:27:19.771863', 'step': 12423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.803478', 'step': 12423, 'epoch': 2} {'type': 'loss', 'content': 0.10240376740694046, 'timestamp': '2025-09-30 22:27:19.828073', 'step': 12424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.861105', 'step': 12424, 'epoch': 2} {'type': 'loss', 'content': 0.09548433870077133, 'timestamp': '2025-09-30 22:27:19.863469', 'step': 12425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.894081', 'step': 12425, 'epoch': 2} {'type': 'loss', 'content': 0.12326885014772415, 'timestamp': '2025-09-30 22:27:19.897615', 'step': 12426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:19.927990', 'step': 12426, 'epoch': 2} {'type': 'loss', 'content': 0.13456958532333374, 'timestamp': '2025-09-30 22:27:19.931219', 'step': 12427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:19.966116', 'step': 12427, 'epoch': 2} {'type': 'loss', 'content': 0.04108334332704544, 'timestamp': '2025-09-30 22:27:19.990125', 'step': 12428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:20.021141', 'step': 12428, 'epoch': 2} {'type': 'loss', 'content': 0.21637924015522003, 'timestamp': '2025-09-30 22:27:20.028083', 'step': 12429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.073644', 'step': 12429, 'epoch': 2} {'type': 'loss', 'content': 0.11231774091720581, 'timestamp': '2025-09-30 22:27:20.080072', 'step': 12430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.113815', 'step': 12430, 'epoch': 2} {'type': 'loss', 'content': 0.09068560600280762, 'timestamp': '2025-09-30 22:27:20.119705', 'step': 12431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.150687', 'step': 12431, 'epoch': 2} {'type': 'loss', 'content': 0.03652731329202652, 'timestamp': '2025-09-30 22:27:20.177740', 'step': 12432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:20.212889', 'step': 12432, 'epoch': 2} {'type': 'loss', 'content': 0.08737991005182266, 'timestamp': '2025-09-30 22:27:20.215758', 'step': 12433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:20.247446', 'step': 12433, 'epoch': 2} {'type': 'loss', 'content': 0.15449324250221252, 'timestamp': '2025-09-30 22:27:20.253365', 'step': 12434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:20.287595', 'step': 12434, 'epoch': 2} {'type': 'loss', 'content': 0.061417143791913986, 'timestamp': '2025-09-30 22:27:20.289886', 'step': 12435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:20.320676', 'step': 12435, 'epoch': 2} {'type': 'loss', 'content': 0.0990917757153511, 'timestamp': '2025-09-30 22:27:20.345972', 'step': 12436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:20.388123', 'step': 12436, 'epoch': 2} {'type': 'loss', 'content': 0.09687750786542892, 'timestamp': '2025-09-30 22:27:20.391238', 'step': 12437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:20.422689', 'step': 12437, 'epoch': 2} {'type': 'loss', 'content': 0.07302765548229218, 'timestamp': '2025-09-30 22:27:20.426500', 'step': 12438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:20.458392', 'step': 12438, 'epoch': 2} {'type': 'loss', 'content': 0.2503642141819, 'timestamp': '2025-09-30 22:27:20.463899', 'step': 12439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:20.498694', 'step': 12439, 'epoch': 2} {'type': 'loss', 'content': 0.10081664472818375, 'timestamp': '2025-09-30 22:27:20.523040', 'step': 12440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:20.566172', 'step': 12440, 'epoch': 2} {'type': 'loss', 'content': 0.11475971341133118, 'timestamp': '2025-09-30 22:27:20.568649', 'step': 12441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:20.600698', 'step': 12441, 'epoch': 2} {'type': 'loss', 'content': 0.0704568475484848, 'timestamp': '2025-09-30 22:27:20.603750', 'step': 12442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:20.634895', 'step': 12442, 'epoch': 2} {'type': 'loss', 'content': 0.07787109166383743, 'timestamp': '2025-09-30 22:27:20.639432', 'step': 12443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:20.674592', 'step': 12443, 'epoch': 2} {'type': 'loss', 'content': 0.1524408459663391, 'timestamp': '2025-09-30 22:27:20.701926', 'step': 12444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:20.732943', 'step': 12444, 'epoch': 2} {'type': 'loss', 'content': 0.12337424606084824, 'timestamp': '2025-09-30 22:27:20.736973', 'step': 12445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:20.773954', 'step': 12445, 'epoch': 2} {'type': 'loss', 'content': 0.11597485840320587, 'timestamp': '2025-09-30 22:27:20.777950', 'step': 12446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:20.810023', 'step': 12446, 'epoch': 2} {'type': 'loss', 'content': 0.1212504655122757, 'timestamp': '2025-09-30 22:27:20.814135', 'step': 12447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:20.845945', 'step': 12447, 'epoch': 2} {'type': 'loss', 'content': 0.10669457167387009, 'timestamp': '2025-09-30 22:27:20.870115', 'step': 12448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.907400', 'step': 12448, 'epoch': 2} {'type': 'loss', 'content': 0.054511282593011856, 'timestamp': '2025-09-30 22:27:20.910350', 'step': 12449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.947536', 'step': 12449, 'epoch': 2} {'type': 'loss', 'content': 0.056696292012929916, 'timestamp': '2025-09-30 22:27:20.958137', 'step': 12450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:20.989639', 'step': 12450, 'epoch': 2} {'type': 'loss', 'content': 0.1746303290128708, 'timestamp': '2025-09-30 22:27:20.998443', 'step': 12451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:21.030254', 'step': 12451, 'epoch': 2} {'type': 'loss', 'content': 0.16750162839889526, 'timestamp': '2025-09-30 22:27:21.054768', 'step': 12452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:21.089445', 'step': 12452, 'epoch': 2} {'type': 'loss', 'content': 0.09148237854242325, 'timestamp': '2025-09-30 22:27:21.097747', 'step': 12453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.129636', 'step': 12453, 'epoch': 2} {'type': 'loss', 'content': 0.21402786672115326, 'timestamp': '2025-09-30 22:27:21.133604', 'step': 12454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.169330', 'step': 12454, 'epoch': 2} {'type': 'loss', 'content': 0.11894330382347107, 'timestamp': '2025-09-30 22:27:21.172402', 'step': 12455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.208511', 'step': 12455, 'epoch': 2} {'type': 'loss', 'content': 0.12145798653364182, 'timestamp': '2025-09-30 22:27:21.233059', 'step': 12456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:21.270576', 'step': 12456, 'epoch': 2} {'type': 'loss', 'content': 0.07948287576436996, 'timestamp': '2025-09-30 22:27:21.276758', 'step': 12457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:21.312987', 'step': 12457, 'epoch': 2} {'type': 'loss', 'content': 0.09706135094165802, 'timestamp': '2025-09-30 22:27:21.316063', 'step': 12458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:21.347555', 'step': 12458, 'epoch': 2} {'type': 'loss', 'content': 0.17785422503948212, 'timestamp': '2025-09-30 22:27:21.354725', 'step': 12459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:27:21.388098', 'step': 12459, 'epoch': 2} {'type': 'loss', 'content': 0.08385145664215088, 'timestamp': '2025-09-30 22:27:21.416546', 'step': 12460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:21.448553', 'step': 12460, 'epoch': 2} {'type': 'loss', 'content': 0.10517574846744537, 'timestamp': '2025-09-30 22:27:21.451844', 'step': 12461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.488032', 'step': 12461, 'epoch': 2} {'type': 'loss', 'content': 0.09725290536880493, 'timestamp': '2025-09-30 22:27:21.495565', 'step': 12462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.539041', 'step': 12462, 'epoch': 2} {'type': 'loss', 'content': 0.1375548541545868, 'timestamp': '2025-09-30 22:27:21.542127', 'step': 12463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.576164', 'step': 12463, 'epoch': 2} {'type': 'loss', 'content': 0.07632917910814285, 'timestamp': '2025-09-30 22:27:21.600760', 'step': 12464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.636309', 'step': 12464, 'epoch': 2} {'type': 'loss', 'content': 0.059012990444898605, 'timestamp': '2025-09-30 22:27:21.643629', 'step': 12465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.674357', 'step': 12465, 'epoch': 2} {'type': 'loss', 'content': 0.09753413498401642, 'timestamp': '2025-09-30 22:27:21.676883', 'step': 12466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.708873', 'step': 12466, 'epoch': 2} {'type': 'loss', 'content': 0.07198599725961685, 'timestamp': '2025-09-30 22:27:21.713022', 'step': 12467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.747988', 'step': 12467, 'epoch': 2} {'type': 'loss', 'content': 0.08531917631626129, 'timestamp': '2025-09-30 22:27:21.772774', 'step': 12468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:21.802765', 'step': 12468, 'epoch': 2} {'type': 'loss', 'content': 0.0980156660079956, 'timestamp': '2025-09-30 22:27:21.809014', 'step': 12469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.842136', 'step': 12469, 'epoch': 2} {'type': 'loss', 'content': 0.09232679009437561, 'timestamp': '2025-09-30 22:27:21.847797', 'step': 12470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.881761', 'step': 12470, 'epoch': 2} {'type': 'loss', 'content': 0.12568022310733795, 'timestamp': '2025-09-30 22:27:21.884176', 'step': 12471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:21.919658', 'step': 12471, 'epoch': 2} {'type': 'loss', 'content': 0.07206695526838303, 'timestamp': '2025-09-30 22:27:21.944515', 'step': 12472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:21.980672', 'step': 12472, 'epoch': 2} {'type': 'loss', 'content': 0.14594542980194092, 'timestamp': '2025-09-30 22:27:21.987475', 'step': 12473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.022530', 'step': 12473, 'epoch': 2} {'type': 'loss', 'content': 0.09046456217765808, 'timestamp': '2025-09-30 22:27:22.026195', 'step': 12474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.058549', 'step': 12474, 'epoch': 2} {'type': 'loss', 'content': 0.055707287043333054, 'timestamp': '2025-09-30 22:27:22.062793', 'step': 12475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.095165', 'step': 12475, 'epoch': 2} {'type': 'loss', 'content': 0.08347198367118835, 'timestamp': '2025-09-30 22:27:22.126176', 'step': 12476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.169008', 'step': 12476, 'epoch': 2} {'type': 'loss', 'content': 0.14790819585323334, 'timestamp': '2025-09-30 22:27:22.174906', 'step': 12477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.209293', 'step': 12477, 'epoch': 2} {'type': 'loss', 'content': 0.12051232904195786, 'timestamp': '2025-09-30 22:27:22.214297', 'step': 12478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.245897', 'step': 12478, 'epoch': 2} {'type': 'loss', 'content': 0.1067187562584877, 'timestamp': '2025-09-30 22:27:22.250132', 'step': 12479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.285521', 'step': 12479, 'epoch': 2} {'type': 'loss', 'content': 0.10593680292367935, 'timestamp': '2025-09-30 22:27:22.313241', 'step': 12480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.344324', 'step': 12480, 'epoch': 2} {'type': 'loss', 'content': 0.030648939311504364, 'timestamp': '2025-09-30 22:27:22.347264', 'step': 12481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.386848', 'step': 12481, 'epoch': 2} {'type': 'loss', 'content': 0.09943249821662903, 'timestamp': '2025-09-30 22:27:22.389245', 'step': 12482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.424547', 'step': 12482, 'epoch': 2} {'type': 'loss', 'content': 0.08124515414237976, 'timestamp': '2025-09-30 22:27:22.427282', 'step': 12483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.459393', 'step': 12483, 'epoch': 2} {'type': 'loss', 'content': 0.1030946895480156, 'timestamp': '2025-09-30 22:27:22.486888', 'step': 12484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:22.518380', 'step': 12484, 'epoch': 2} {'type': 'loss', 'content': 0.13133110105991364, 'timestamp': '2025-09-30 22:27:22.521373', 'step': 12485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:22.565120', 'step': 12485, 'epoch': 2} {'type': 'loss', 'content': 0.09761586040258408, 'timestamp': '2025-09-30 22:27:22.568399', 'step': 12486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.601776', 'step': 12486, 'epoch': 2} {'type': 'loss', 'content': 0.16185756027698517, 'timestamp': '2025-09-30 22:27:22.609694', 'step': 12487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.647508', 'step': 12487, 'epoch': 2} {'type': 'loss', 'content': 0.11608779430389404, 'timestamp': '2025-09-30 22:27:22.677314', 'step': 12488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.709380', 'step': 12488, 'epoch': 2} {'type': 'loss', 'content': 0.12083883583545685, 'timestamp': '2025-09-30 22:27:22.713418', 'step': 12489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.745576', 'step': 12489, 'epoch': 2} {'type': 'loss', 'content': 0.08266113698482513, 'timestamp': '2025-09-30 22:27:22.749528', 'step': 12490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.790537', 'step': 12490, 'epoch': 2} {'type': 'loss', 'content': 0.09029217809438705, 'timestamp': '2025-09-30 22:27:22.793687', 'step': 12491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.825175', 'step': 12491, 'epoch': 2} {'type': 'loss', 'content': 0.1008835881948471, 'timestamp': '2025-09-30 22:27:22.849984', 'step': 12492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:22.886169', 'step': 12492, 'epoch': 2} {'type': 'loss', 'content': 0.1143648624420166, 'timestamp': '2025-09-30 22:27:22.888464', 'step': 12493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:22.920039', 'step': 12493, 'epoch': 2} {'type': 'loss', 'content': 0.10748721659183502, 'timestamp': '2025-09-30 22:27:22.922470', 'step': 12494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.958898', 'step': 12494, 'epoch': 2} {'type': 'loss', 'content': 0.07476840913295746, 'timestamp': '2025-09-30 22:27:22.961622', 'step': 12495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:22.994038', 'step': 12495, 'epoch': 2} {'type': 'loss', 'content': 0.15455253422260284, 'timestamp': '2025-09-30 22:27:23.018783', 'step': 12496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:23.049772', 'step': 12496, 'epoch': 2} {'type': 'loss', 'content': 0.1276387721300125, 'timestamp': '2025-09-30 22:27:23.058586', 'step': 12497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:23.094024', 'step': 12497, 'epoch': 2} {'type': 'loss', 'content': 0.12691840529441833, 'timestamp': '2025-09-30 22:27:23.099994', 'step': 12498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:23.137534', 'step': 12498, 'epoch': 2} {'type': 'loss', 'content': 0.14489811658859253, 'timestamp': '2025-09-30 22:27:23.144431', 'step': 12499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:23.182461', 'step': 12499, 'epoch': 2} {'type': 'loss', 'content': 0.1522267460823059, 'timestamp': '2025-09-30 22:27:23.206888', 'step': 12500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 12500', 'timestamp': '2025-09-30 22:27:28.314933', 'step': 12500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.360287', 'step': 12500, 'epoch': 2} {'type': 'loss', 'content': 0.08278502523899078, 'timestamp': '2025-09-30 22:27:28.362678', 'step': 12501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:28.393321', 'step': 12501, 'epoch': 2} {'type': 'loss', 'content': 0.11968047171831131, 'timestamp': '2025-09-30 22:27:28.395711', 'step': 12502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.428185', 'step': 12502, 'epoch': 2} {'type': 'loss', 'content': 0.12449926882982254, 'timestamp': '2025-09-30 22:27:28.430432', 'step': 12503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.462887', 'step': 12503, 'epoch': 2} {'type': 'loss', 'content': 0.1352875679731369, 'timestamp': '2025-09-30 22:27:28.486888', 'step': 12504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:28.516673', 'step': 12504, 'epoch': 2} {'type': 'loss', 'content': 0.10107776522636414, 'timestamp': '2025-09-30 22:27:28.518915', 'step': 12505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:28.555172', 'step': 12505, 'epoch': 2} {'type': 'loss', 'content': 0.09430047124624252, 'timestamp': '2025-09-30 22:27:28.557924', 'step': 12506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:28.589739', 'step': 12506, 'epoch': 2} {'type': 'loss', 'content': 0.16976584494113922, 'timestamp': '2025-09-30 22:27:28.592023', 'step': 12507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:28.621888', 'step': 12507, 'epoch': 2} {'type': 'loss', 'content': 0.07387836277484894, 'timestamp': '2025-09-30 22:27:28.647239', 'step': 12508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:27:28.679332', 'step': 12508, 'epoch': 2} {'type': 'loss', 'content': 0.12947073578834534, 'timestamp': '2025-09-30 22:27:28.681509', 'step': 12509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:28.712062', 'step': 12509, 'epoch': 2} {'type': 'loss', 'content': 0.11104903370141983, 'timestamp': '2025-09-30 22:27:28.723977', 'step': 12510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.754507', 'step': 12510, 'epoch': 2} {'type': 'loss', 'content': 0.1290317177772522, 'timestamp': '2025-09-30 22:27:28.757374', 'step': 12511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.788469', 'step': 12511, 'epoch': 2} {'type': 'loss', 'content': 0.13994039595127106, 'timestamp': '2025-09-30 22:27:28.813772', 'step': 12512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:28.844172', 'step': 12512, 'epoch': 2} {'type': 'loss', 'content': 0.0935422033071518, 'timestamp': '2025-09-30 22:27:28.846681', 'step': 12513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:28.877326', 'step': 12513, 'epoch': 2} {'type': 'loss', 'content': 0.06984591484069824, 'timestamp': '2025-09-30 22:27:28.879720', 'step': 12514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:28.909693', 'step': 12514, 'epoch': 2} {'type': 'loss', 'content': 0.120243601500988, 'timestamp': '2025-09-30 22:27:28.912767', 'step': 12515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:28.944832', 'step': 12515, 'epoch': 2} {'type': 'loss', 'content': 0.05749063938856125, 'timestamp': '2025-09-30 22:27:28.970166', 'step': 12516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:29.000860', 'step': 12516, 'epoch': 2} {'type': 'loss', 'content': 0.21017533540725708, 'timestamp': '2025-09-30 22:27:29.003529', 'step': 12517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.038899', 'step': 12517, 'epoch': 2} {'type': 'loss', 'content': 0.056912072002887726, 'timestamp': '2025-09-30 22:27:29.041411', 'step': 12518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:29.072524', 'step': 12518, 'epoch': 2} {'type': 'loss', 'content': 0.08628340810537338, 'timestamp': '2025-09-30 22:27:29.075279', 'step': 12519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:29.105721', 'step': 12519, 'epoch': 2} {'type': 'loss', 'content': 0.08511361479759216, 'timestamp': '2025-09-30 22:27:29.129867', 'step': 12520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.162200', 'step': 12520, 'epoch': 2} {'type': 'loss', 'content': 0.06644701957702637, 'timestamp': '2025-09-30 22:27:29.165016', 'step': 12521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.195416', 'step': 12521, 'epoch': 2} {'type': 'loss', 'content': 0.10406797379255295, 'timestamp': '2025-09-30 22:27:29.198124', 'step': 12522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.228691', 'step': 12522, 'epoch': 2} {'type': 'loss', 'content': 0.05458059534430504, 'timestamp': '2025-09-30 22:27:29.235301', 'step': 12523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.275669', 'step': 12523, 'epoch': 2} {'type': 'loss', 'content': 0.1444423794746399, 'timestamp': '2025-09-30 22:27:29.299281', 'step': 12524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.328892', 'step': 12524, 'epoch': 2} {'type': 'loss', 'content': 0.10237288475036621, 'timestamp': '2025-09-30 22:27:29.331218', 'step': 12525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.365323', 'step': 12525, 'epoch': 2} {'type': 'loss', 'content': 0.170571967959404, 'timestamp': '2025-09-30 22:27:29.369685', 'step': 12526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.401877', 'step': 12526, 'epoch': 2} {'type': 'loss', 'content': 0.12063632160425186, 'timestamp': '2025-09-30 22:27:29.404451', 'step': 12527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.437509', 'step': 12527, 'epoch': 2} {'type': 'loss', 'content': 0.0619070939719677, 'timestamp': '2025-09-30 22:27:29.462544', 'step': 12528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:29.494583', 'step': 12528, 'epoch': 2} {'type': 'loss', 'content': 0.10590694844722748, 'timestamp': '2025-09-30 22:27:29.500000', 'step': 12529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.532170', 'step': 12529, 'epoch': 2} {'type': 'loss', 'content': 0.1548137068748474, 'timestamp': '2025-09-30 22:27:29.536151', 'step': 12530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.568046', 'step': 12530, 'epoch': 2} {'type': 'loss', 'content': 0.10210033506155014, 'timestamp': '2025-09-30 22:27:29.571216', 'step': 12531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.602164', 'step': 12531, 'epoch': 2} {'type': 'loss', 'content': 0.01671708934009075, 'timestamp': '2025-09-30 22:27:29.629872', 'step': 12532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.661986', 'step': 12532, 'epoch': 2} {'type': 'loss', 'content': 0.06504949182271957, 'timestamp': '2025-09-30 22:27:29.664610', 'step': 12533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:29.701649', 'step': 12533, 'epoch': 2} {'type': 'loss', 'content': 0.12152670323848724, 'timestamp': '2025-09-30 22:27:29.705868', 'step': 12534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.737189', 'step': 12534, 'epoch': 2} {'type': 'loss', 'content': 0.08372775465250015, 'timestamp': '2025-09-30 22:27:29.739493', 'step': 12535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.770619', 'step': 12535, 'epoch': 2} {'type': 'loss', 'content': 0.1251489222049713, 'timestamp': '2025-09-30 22:27:29.796180', 'step': 12536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:29.826801', 'step': 12536, 'epoch': 2} {'type': 'loss', 'content': 0.05364407226443291, 'timestamp': '2025-09-30 22:27:29.830875', 'step': 12537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.862618', 'step': 12537, 'epoch': 2} {'type': 'loss', 'content': 0.1179639995098114, 'timestamp': '2025-09-30 22:27:29.866849', 'step': 12538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:29.898884', 'step': 12538, 'epoch': 2} {'type': 'loss', 'content': 0.12432943284511566, 'timestamp': '2025-09-30 22:27:29.909591', 'step': 12539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:29.945557', 'step': 12539, 'epoch': 2} {'type': 'loss', 'content': 0.055577490478754044, 'timestamp': '2025-09-30 22:27:29.970891', 'step': 12540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:30.007106', 'step': 12540, 'epoch': 2} {'type': 'loss', 'content': 0.1289595514535904, 'timestamp': '2025-09-30 22:27:30.009830', 'step': 12541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.043596', 'step': 12541, 'epoch': 2} {'type': 'loss', 'content': 0.22313150763511658, 'timestamp': '2025-09-30 22:27:30.050689', 'step': 12542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:30.084745', 'step': 12542, 'epoch': 2} {'type': 'loss', 'content': 0.1111091747879982, 'timestamp': '2025-09-30 22:27:30.087722', 'step': 12543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:30.118444', 'step': 12543, 'epoch': 2} {'type': 'loss', 'content': 0.04508950561285019, 'timestamp': '2025-09-30 22:27:30.146534', 'step': 12544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:30.184572', 'step': 12544, 'epoch': 2} {'type': 'loss', 'content': 0.09307849407196045, 'timestamp': '2025-09-30 22:27:30.187375', 'step': 12545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.225410', 'step': 12545, 'epoch': 2} {'type': 'loss', 'content': 0.07946247607469559, 'timestamp': '2025-09-30 22:27:30.228997', 'step': 12546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:30.265936', 'step': 12546, 'epoch': 2} {'type': 'loss', 'content': 0.18943874537944794, 'timestamp': '2025-09-30 22:27:30.270771', 'step': 12547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.304671', 'step': 12547, 'epoch': 2} {'type': 'loss', 'content': 0.08377565443515778, 'timestamp': '2025-09-30 22:27:30.328849', 'step': 12548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:30.366865', 'step': 12548, 'epoch': 2} {'type': 'loss', 'content': 0.06697379052639008, 'timestamp': '2025-09-30 22:27:30.371645', 'step': 12549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:30.403913', 'step': 12549, 'epoch': 2} {'type': 'loss', 'content': 0.1394454836845398, 'timestamp': '2025-09-30 22:27:30.414144', 'step': 12550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:30.451165', 'step': 12550, 'epoch': 2} {'type': 'loss', 'content': 0.1833379566669464, 'timestamp': '2025-09-30 22:27:30.454476', 'step': 12551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:30.490381', 'step': 12551, 'epoch': 2} {'type': 'loss', 'content': 0.18118928372859955, 'timestamp': '2025-09-30 22:27:30.515919', 'step': 12552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.557559', 'step': 12552, 'epoch': 2} {'type': 'loss', 'content': 0.053957533091306686, 'timestamp': '2025-09-30 22:27:30.561405', 'step': 12553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.599746', 'step': 12553, 'epoch': 2} {'type': 'loss', 'content': 0.11982849985361099, 'timestamp': '2025-09-30 22:27:30.609671', 'step': 12554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.652270', 'step': 12554, 'epoch': 2} {'type': 'loss', 'content': 0.16588300466537476, 'timestamp': '2025-09-30 22:27:30.663397', 'step': 12555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.708679', 'step': 12555, 'epoch': 2} {'type': 'loss', 'content': 0.18591123819351196, 'timestamp': '2025-09-30 22:27:30.732943', 'step': 12556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.764267', 'step': 12556, 'epoch': 2} {'type': 'loss', 'content': 0.1037607416510582, 'timestamp': '2025-09-30 22:27:30.768451', 'step': 12557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:30.801984', 'step': 12557, 'epoch': 2} {'type': 'loss', 'content': 0.08391167223453522, 'timestamp': '2025-09-30 22:27:30.805648', 'step': 12558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.837518', 'step': 12558, 'epoch': 2} {'type': 'loss', 'content': 0.16615968942642212, 'timestamp': '2025-09-30 22:27:30.841560', 'step': 12559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:30.873703', 'step': 12559, 'epoch': 2} {'type': 'loss', 'content': 0.12348227947950363, 'timestamp': '2025-09-30 22:27:30.899867', 'step': 12560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:30.933296', 'step': 12560, 'epoch': 2} {'type': 'loss', 'content': 0.06605423241853714, 'timestamp': '2025-09-30 22:27:30.940909', 'step': 12561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:30.980754', 'step': 12561, 'epoch': 2} {'type': 'loss', 'content': 0.1386721283197403, 'timestamp': '2025-09-30 22:27:30.985951', 'step': 12562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.024504', 'step': 12562, 'epoch': 2} {'type': 'loss', 'content': 0.2015225887298584, 'timestamp': '2025-09-30 22:27:31.033561', 'step': 12563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.071922', 'step': 12563, 'epoch': 2} {'type': 'loss', 'content': 0.15600647032260895, 'timestamp': '2025-09-30 22:27:31.096200', 'step': 12564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.135083', 'step': 12564, 'epoch': 2} {'type': 'loss', 'content': 0.1586892008781433, 'timestamp': '2025-09-30 22:27:31.147725', 'step': 12565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.188241', 'step': 12565, 'epoch': 2} {'type': 'loss', 'content': 0.0732315182685852, 'timestamp': '2025-09-30 22:27:31.196705', 'step': 12566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:31.236628', 'step': 12566, 'epoch': 2} {'type': 'loss', 'content': 0.10903401672840118, 'timestamp': '2025-09-30 22:27:31.247343', 'step': 12567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.286955', 'step': 12567, 'epoch': 2} {'type': 'loss', 'content': 0.10191939026117325, 'timestamp': '2025-09-30 22:27:31.312146', 'step': 12568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.343864', 'step': 12568, 'epoch': 2} {'type': 'loss', 'content': 0.21021926403045654, 'timestamp': '2025-09-30 22:27:31.346935', 'step': 12569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.378506', 'step': 12569, 'epoch': 2} {'type': 'loss', 'content': 0.21225953102111816, 'timestamp': '2025-09-30 22:27:31.381289', 'step': 12570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.412354', 'step': 12570, 'epoch': 2} {'type': 'loss', 'content': 0.1123652458190918, 'timestamp': '2025-09-30 22:27:31.415426', 'step': 12571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.447494', 'step': 12571, 'epoch': 2} {'type': 'loss', 'content': 0.08620104938745499, 'timestamp': '2025-09-30 22:27:31.471903', 'step': 12572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:31.510744', 'step': 12572, 'epoch': 2} {'type': 'loss', 'content': 0.1265978068113327, 'timestamp': '2025-09-30 22:27:31.514175', 'step': 12573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:31.545408', 'step': 12573, 'epoch': 2} {'type': 'loss', 'content': 0.14886121451854706, 'timestamp': '2025-09-30 22:27:31.556236', 'step': 12574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:31.596512', 'step': 12574, 'epoch': 2} {'type': 'loss', 'content': 0.09824784100055695, 'timestamp': '2025-09-30 22:27:31.599577', 'step': 12575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:31.631564', 'step': 12575, 'epoch': 2} {'type': 'loss', 'content': 0.1125742644071579, 'timestamp': '2025-09-30 22:27:31.657261', 'step': 12576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:31.688678', 'step': 12576, 'epoch': 2} {'type': 'loss', 'content': 0.11840914189815521, 'timestamp': '2025-09-30 22:27:31.692237', 'step': 12577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:31.732501', 'step': 12577, 'epoch': 2} {'type': 'loss', 'content': 0.1537708193063736, 'timestamp': '2025-09-30 22:27:31.736061', 'step': 12578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.767774', 'step': 12578, 'epoch': 2} {'type': 'loss', 'content': 0.10327261686325073, 'timestamp': '2025-09-30 22:27:31.780788', 'step': 12579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:31.813175', 'step': 12579, 'epoch': 2} {'type': 'loss', 'content': 0.16577468812465668, 'timestamp': '2025-09-30 22:27:31.838813', 'step': 12580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.880648', 'step': 12580, 'epoch': 2} {'type': 'loss', 'content': 0.12034524977207184, 'timestamp': '2025-09-30 22:27:31.884607', 'step': 12581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:31.915107', 'step': 12581, 'epoch': 2} {'type': 'loss', 'content': 0.12024199217557907, 'timestamp': '2025-09-30 22:27:31.917297', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:27:39.822525', 'step': 12582, 'epoch': 2} {'type': 'pplx', 'content': 12943.63622050349, 'timestamp': '2025-09-30 22:27:39.826359', 'step': 12582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:39.863759', 'step': 12582, 'epoch': 2} {'type': 'loss', 'content': 0.07169398665428162, 'timestamp': '2025-09-30 22:27:39.872836', 'step': 12583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:39.904857', 'step': 12583, 'epoch': 2} {'type': 'loss', 'content': 0.1039624959230423, 'timestamp': '2025-09-30 22:27:39.930182', 'step': 12584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:39.966791', 'step': 12584, 'epoch': 2} {'type': 'loss', 'content': 0.07870294153690338, 'timestamp': '2025-09-30 22:27:39.970211', 'step': 12585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:40.001165', 'step': 12585, 'epoch': 2} {'type': 'loss', 'content': 0.10520850121974945, 'timestamp': '2025-09-30 22:27:40.006317', 'step': 12586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.043524', 'step': 12586, 'epoch': 2} {'type': 'loss', 'content': 0.0661974623799324, 'timestamp': '2025-09-30 22:27:40.045930', 'step': 12587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.076544', 'step': 12587, 'epoch': 2} {'type': 'loss', 'content': 0.1386609673500061, 'timestamp': '2025-09-30 22:27:40.100708', 'step': 12588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.135175', 'step': 12588, 'epoch': 2} {'type': 'loss', 'content': 0.09590935707092285, 'timestamp': '2025-09-30 22:27:40.139516', 'step': 12589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:40.169843', 'step': 12589, 'epoch': 2} {'type': 'loss', 'content': 0.11930160969495773, 'timestamp': '2025-09-30 22:27:40.172494', 'step': 12590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.202984', 'step': 12590, 'epoch': 2} {'type': 'loss', 'content': 0.048897139728069305, 'timestamp': '2025-09-30 22:27:40.209203', 'step': 12591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.239419', 'step': 12591, 'epoch': 2} {'type': 'loss', 'content': 0.05885585770010948, 'timestamp': '2025-09-30 22:27:40.263743', 'step': 12592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.293378', 'step': 12592, 'epoch': 2} {'type': 'loss', 'content': 0.12006006389856339, 'timestamp': '2025-09-30 22:27:40.296412', 'step': 12593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:40.327272', 'step': 12593, 'epoch': 2} {'type': 'loss', 'content': 0.11984840780496597, 'timestamp': '2025-09-30 22:27:40.330562', 'step': 12594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:40.361819', 'step': 12594, 'epoch': 2} {'type': 'loss', 'content': 0.08675310015678406, 'timestamp': '2025-09-30 22:27:40.364835', 'step': 12595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.396054', 'step': 12595, 'epoch': 2} {'type': 'loss', 'content': 0.13395065069198608, 'timestamp': '2025-09-30 22:27:40.420042', 'step': 12596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.451349', 'step': 12596, 'epoch': 2} {'type': 'loss', 'content': 0.08356110751628876, 'timestamp': '2025-09-30 22:27:40.454001', 'step': 12597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.484434', 'step': 12597, 'epoch': 2} {'type': 'loss', 'content': 0.12910787761211395, 'timestamp': '2025-09-30 22:27:40.488435', 'step': 12598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.521197', 'step': 12598, 'epoch': 2} {'type': 'loss', 'content': 0.2214948832988739, 'timestamp': '2025-09-30 22:27:40.529341', 'step': 12599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:40.561761', 'step': 12599, 'epoch': 2} {'type': 'loss', 'content': 0.12231095880270004, 'timestamp': '2025-09-30 22:27:40.586616', 'step': 12600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.619162', 'step': 12600, 'epoch': 2} {'type': 'loss', 'content': 0.09994450956583023, 'timestamp': '2025-09-30 22:27:40.622771', 'step': 12601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:40.655835', 'step': 12601, 'epoch': 2} {'type': 'loss', 'content': 0.11019545048475266, 'timestamp': '2025-09-30 22:27:40.658507', 'step': 12602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:40.689882', 'step': 12602, 'epoch': 2} {'type': 'loss', 'content': 0.04953889176249504, 'timestamp': '2025-09-30 22:27:40.693275', 'step': 12603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.724439', 'step': 12603, 'epoch': 2} {'type': 'loss', 'content': 0.09542084485292435, 'timestamp': '2025-09-30 22:27:40.749437', 'step': 12604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.780611', 'step': 12604, 'epoch': 2} {'type': 'loss', 'content': 0.06231687590479851, 'timestamp': '2025-09-30 22:27:40.783950', 'step': 12605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:40.820705', 'step': 12605, 'epoch': 2} {'type': 'loss', 'content': 0.09386193752288818, 'timestamp': '2025-09-30 22:27:40.828539', 'step': 12606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.862154', 'step': 12606, 'epoch': 2} {'type': 'loss', 'content': 0.08790207654237747, 'timestamp': '2025-09-30 22:27:40.864749', 'step': 12607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.894847', 'step': 12607, 'epoch': 2} {'type': 'loss', 'content': 0.1417107880115509, 'timestamp': '2025-09-30 22:27:40.926123', 'step': 12608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:40.959875', 'step': 12608, 'epoch': 2} {'type': 'loss', 'content': 0.014056079089641571, 'timestamp': '2025-09-30 22:27:40.964794', 'step': 12609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:40.996891', 'step': 12609, 'epoch': 2} {'type': 'loss', 'content': 0.13553255796432495, 'timestamp': '2025-09-30 22:27:40.999306', 'step': 12610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:41.031322', 'step': 12610, 'epoch': 2} {'type': 'loss', 'content': 0.05227944254875183, 'timestamp': '2025-09-30 22:27:41.033630', 'step': 12611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:41.066101', 'step': 12611, 'epoch': 2} {'type': 'loss', 'content': 0.09572344273328781, 'timestamp': '2025-09-30 22:27:41.091065', 'step': 12612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:41.122298', 'step': 12612, 'epoch': 2} {'type': 'loss', 'content': 0.11998773366212845, 'timestamp': '2025-09-30 22:27:41.125762', 'step': 12613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:41.163275', 'step': 12613, 'epoch': 2} {'type': 'loss', 'content': 0.07431536167860031, 'timestamp': '2025-09-30 22:27:41.167008', 'step': 12614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:41.198016', 'step': 12614, 'epoch': 2} {'type': 'loss', 'content': 0.09309330582618713, 'timestamp': '2025-09-30 22:27:41.200125', 'step': 12615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:41.237440', 'step': 12615, 'epoch': 2} {'type': 'loss', 'content': 0.08578332513570786, 'timestamp': '2025-09-30 22:27:41.261684', 'step': 12616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.294787', 'step': 12616, 'epoch': 2} {'type': 'loss', 'content': 0.09347988665103912, 'timestamp': '2025-09-30 22:27:41.298470', 'step': 12617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.333654', 'step': 12617, 'epoch': 2} {'type': 'loss', 'content': 0.1174122542142868, 'timestamp': '2025-09-30 22:27:41.338336', 'step': 12618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.368909', 'step': 12618, 'epoch': 2} {'type': 'loss', 'content': 0.12227831035852432, 'timestamp': '2025-09-30 22:27:41.371844', 'step': 12619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.402411', 'step': 12619, 'epoch': 2} {'type': 'loss', 'content': 0.11860878765583038, 'timestamp': '2025-09-30 22:27:41.427022', 'step': 12620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.457262', 'step': 12620, 'epoch': 2} {'type': 'loss', 'content': 0.045092176645994186, 'timestamp': '2025-09-30 22:27:41.459793', 'step': 12621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.490212', 'step': 12621, 'epoch': 2} {'type': 'loss', 'content': 0.08988211303949356, 'timestamp': '2025-09-30 22:27:41.492621', 'step': 12622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:41.524512', 'step': 12622, 'epoch': 2} {'type': 'loss', 'content': 0.1401776820421219, 'timestamp': '2025-09-30 22:27:41.529921', 'step': 12623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:41.564456', 'step': 12623, 'epoch': 2} {'type': 'loss', 'content': 0.14956144988536835, 'timestamp': '2025-09-30 22:27:41.588389', 'step': 12624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:41.618752', 'step': 12624, 'epoch': 2} {'type': 'loss', 'content': 0.1285393089056015, 'timestamp': '2025-09-30 22:27:41.622053', 'step': 12625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:41.653843', 'step': 12625, 'epoch': 2} {'type': 'loss', 'content': 0.09296514093875885, 'timestamp': '2025-09-30 22:27:41.656004', 'step': 12626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.686709', 'step': 12626, 'epoch': 2} {'type': 'loss', 'content': 0.18508198857307434, 'timestamp': '2025-09-30 22:27:41.691990', 'step': 12627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:41.723870', 'step': 12627, 'epoch': 2} {'type': 'loss', 'content': 0.05242812633514404, 'timestamp': '2025-09-30 22:27:41.748302', 'step': 12628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:41.782703', 'step': 12628, 'epoch': 2} {'type': 'loss', 'content': 0.17407311499118805, 'timestamp': '2025-09-30 22:27:41.786100', 'step': 12629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:41.817845', 'step': 12629, 'epoch': 2} {'type': 'loss', 'content': 0.10697678476572037, 'timestamp': '2025-09-30 22:27:41.820907', 'step': 12630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:41.851859', 'step': 12630, 'epoch': 2} {'type': 'loss', 'content': 0.17124775052070618, 'timestamp': '2025-09-30 22:27:41.857293', 'step': 12631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:41.892182', 'step': 12631, 'epoch': 2} {'type': 'loss', 'content': 0.15360519289970398, 'timestamp': '2025-09-30 22:27:41.917547', 'step': 12632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:41.946702', 'step': 12632, 'epoch': 2} {'type': 'loss', 'content': 0.04030696302652359, 'timestamp': '2025-09-30 22:27:41.948827', 'step': 12633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:41.978887', 'step': 12633, 'epoch': 2} {'type': 'loss', 'content': 0.10038401931524277, 'timestamp': '2025-09-30 22:27:41.987512', 'step': 12634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:42.024952', 'step': 12634, 'epoch': 2} {'type': 'loss', 'content': 0.13312041759490967, 'timestamp': '2025-09-30 22:27:42.028690', 'step': 12635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.059398', 'step': 12635, 'epoch': 2} {'type': 'loss', 'content': 0.0390964113175869, 'timestamp': '2025-09-30 22:27:42.084476', 'step': 12636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:27:42.115386', 'step': 12636, 'epoch': 2} {'type': 'loss', 'content': 0.09917395561933517, 'timestamp': '2025-09-30 22:27:42.118547', 'step': 12637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:42.149126', 'step': 12637, 'epoch': 2} {'type': 'loss', 'content': 0.036966465413570404, 'timestamp': '2025-09-30 22:27:42.151948', 'step': 12638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:42.187171', 'step': 12638, 'epoch': 2} {'type': 'loss', 'content': 0.1063053160905838, 'timestamp': '2025-09-30 22:27:42.191567', 'step': 12639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:42.223572', 'step': 12639, 'epoch': 2} {'type': 'loss', 'content': 0.10440276563167572, 'timestamp': '2025-09-30 22:27:42.248248', 'step': 12640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.279670', 'step': 12640, 'epoch': 2} {'type': 'loss', 'content': 0.10261945426464081, 'timestamp': '2025-09-30 22:27:42.282014', 'step': 12641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:42.313747', 'step': 12641, 'epoch': 2} {'type': 'loss', 'content': 0.11934682726860046, 'timestamp': '2025-09-30 22:27:42.317303', 'step': 12642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.348013', 'step': 12642, 'epoch': 2} {'type': 'loss', 'content': 0.10763449966907501, 'timestamp': '2025-09-30 22:27:42.351815', 'step': 12643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.382741', 'step': 12643, 'epoch': 2} {'type': 'loss', 'content': 0.13090214133262634, 'timestamp': '2025-09-30 22:27:42.407431', 'step': 12644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.444714', 'step': 12644, 'epoch': 2} {'type': 'loss', 'content': 0.13733817636966705, 'timestamp': '2025-09-30 22:27:42.448319', 'step': 12645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.480573', 'step': 12645, 'epoch': 2} {'type': 'loss', 'content': 0.15235799551010132, 'timestamp': '2025-09-30 22:27:42.484252', 'step': 12646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.515858', 'step': 12646, 'epoch': 2} {'type': 'loss', 'content': 0.0639142096042633, 'timestamp': '2025-09-30 22:27:42.518735', 'step': 12647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.548931', 'step': 12647, 'epoch': 2} {'type': 'loss', 'content': 0.15768250823020935, 'timestamp': '2025-09-30 22:27:42.573510', 'step': 12648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.605067', 'step': 12648, 'epoch': 2} {'type': 'loss', 'content': 0.12155218422412872, 'timestamp': '2025-09-30 22:27:42.608778', 'step': 12649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:42.640673', 'step': 12649, 'epoch': 2} {'type': 'loss', 'content': 0.09470286220312119, 'timestamp': '2025-09-30 22:27:42.643342', 'step': 12650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.674757', 'step': 12650, 'epoch': 2} {'type': 'loss', 'content': 0.11392911523580551, 'timestamp': '2025-09-30 22:27:42.677362', 'step': 12651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:42.709066', 'step': 12651, 'epoch': 2} {'type': 'loss', 'content': 0.11597626656293869, 'timestamp': '2025-09-30 22:27:42.733637', 'step': 12652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:42.764966', 'step': 12652, 'epoch': 2} {'type': 'loss', 'content': 0.061921846121549606, 'timestamp': '2025-09-30 22:27:42.771398', 'step': 12653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.817626', 'step': 12653, 'epoch': 2} {'type': 'loss', 'content': 0.08489898592233658, 'timestamp': '2025-09-30 22:27:42.822246', 'step': 12654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:42.855582', 'step': 12654, 'epoch': 2} {'type': 'loss', 'content': 0.06608045846223831, 'timestamp': '2025-09-30 22:27:42.858370', 'step': 12655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:42.908084', 'step': 12655, 'epoch': 2} {'type': 'loss', 'content': 0.0923440158367157, 'timestamp': '2025-09-30 22:27:42.933494', 'step': 12656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:42.966719', 'step': 12656, 'epoch': 2} {'type': 'loss', 'content': 0.20535016059875488, 'timestamp': '2025-09-30 22:27:42.971808', 'step': 12657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:43.010696', 'step': 12657, 'epoch': 2} {'type': 'loss', 'content': 0.15524667501449585, 'timestamp': '2025-09-30 22:27:43.016032', 'step': 12658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.054095', 'step': 12658, 'epoch': 2} {'type': 'loss', 'content': 0.08979518711566925, 'timestamp': '2025-09-30 22:27:43.057265', 'step': 12659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.088758', 'step': 12659, 'epoch': 2} {'type': 'loss', 'content': 0.17031608521938324, 'timestamp': '2025-09-30 22:27:43.115176', 'step': 12660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:43.146455', 'step': 12660, 'epoch': 2} {'type': 'loss', 'content': 0.1930367350578308, 'timestamp': '2025-09-30 22:27:43.150254', 'step': 12661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.183460', 'step': 12661, 'epoch': 2} {'type': 'loss', 'content': 0.14442098140716553, 'timestamp': '2025-09-30 22:27:43.186392', 'step': 12662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:27:43.218536', 'step': 12662, 'epoch': 2} {'type': 'loss', 'content': 0.08709081262350082, 'timestamp': '2025-09-30 22:27:43.227691', 'step': 12663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:43.270901', 'step': 12663, 'epoch': 2} {'type': 'loss', 'content': 0.07811953872442245, 'timestamp': '2025-09-30 22:27:43.298977', 'step': 12664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.328906', 'step': 12664, 'epoch': 2} {'type': 'loss', 'content': 0.07959912717342377, 'timestamp': '2025-09-30 22:27:43.336051', 'step': 12665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:43.368533', 'step': 12665, 'epoch': 2} {'type': 'loss', 'content': 0.08802756667137146, 'timestamp': '2025-09-30 22:27:43.371410', 'step': 12666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.402443', 'step': 12666, 'epoch': 2} {'type': 'loss', 'content': 0.09676223248243332, 'timestamp': '2025-09-30 22:27:43.405128', 'step': 12667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:43.437834', 'step': 12667, 'epoch': 2} {'type': 'loss', 'content': 0.08463400602340698, 'timestamp': '2025-09-30 22:27:43.462526', 'step': 12668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.496687', 'step': 12668, 'epoch': 2} {'type': 'loss', 'content': 0.1411091536283493, 'timestamp': '2025-09-30 22:27:43.501509', 'step': 12669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.535278', 'step': 12669, 'epoch': 2} {'type': 'loss', 'content': 0.13760264217853546, 'timestamp': '2025-09-30 22:27:43.547210', 'step': 12670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.595575', 'step': 12670, 'epoch': 2} {'type': 'loss', 'content': 0.04923465475440025, 'timestamp': '2025-09-30 22:27:43.598951', 'step': 12671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.630329', 'step': 12671, 'epoch': 2} {'type': 'loss', 'content': 0.08067496865987778, 'timestamp': '2025-09-30 22:27:43.661862', 'step': 12672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.694021', 'step': 12672, 'epoch': 2} {'type': 'loss', 'content': 0.04959650710225105, 'timestamp': '2025-09-30 22:27:43.697454', 'step': 12673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.728562', 'step': 12673, 'epoch': 2} {'type': 'loss', 'content': 0.16944631934165955, 'timestamp': '2025-09-30 22:27:43.734543', 'step': 12674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.774010', 'step': 12674, 'epoch': 2} {'type': 'loss', 'content': 0.06272812187671661, 'timestamp': '2025-09-30 22:27:43.788824', 'step': 12675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.821186', 'step': 12675, 'epoch': 2} {'type': 'loss', 'content': 0.10334441065788269, 'timestamp': '2025-09-30 22:27:43.846153', 'step': 12676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.888713', 'step': 12676, 'epoch': 2} {'type': 'loss', 'content': 0.09718332439661026, 'timestamp': '2025-09-30 22:27:43.893045', 'step': 12677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:43.926786', 'step': 12677, 'epoch': 2} {'type': 'loss', 'content': 0.15441055595874786, 'timestamp': '2025-09-30 22:27:43.930408', 'step': 12678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:43.965497', 'step': 12678, 'epoch': 2} {'type': 'loss', 'content': 0.09076084196567535, 'timestamp': '2025-09-30 22:27:43.968989', 'step': 12679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.001105', 'step': 12679, 'epoch': 2} {'type': 'loss', 'content': 0.10949717462062836, 'timestamp': '2025-09-30 22:27:44.032448', 'step': 12680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:44.063803', 'step': 12680, 'epoch': 2} {'type': 'loss', 'content': 0.22035224735736847, 'timestamp': '2025-09-30 22:27:44.067845', 'step': 12681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.100579', 'step': 12681, 'epoch': 2} {'type': 'loss', 'content': 0.17200744152069092, 'timestamp': '2025-09-30 22:27:44.110093', 'step': 12682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.141630', 'step': 12682, 'epoch': 2} {'type': 'loss', 'content': 0.034958504140377045, 'timestamp': '2025-09-30 22:27:44.146921', 'step': 12683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.178274', 'step': 12683, 'epoch': 2} {'type': 'loss', 'content': 0.09902077168226242, 'timestamp': '2025-09-30 22:27:44.204419', 'step': 12684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.246364', 'step': 12684, 'epoch': 2} {'type': 'loss', 'content': 0.06364332139492035, 'timestamp': '2025-09-30 22:27:44.259079', 'step': 12685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.300889', 'step': 12685, 'epoch': 2} {'type': 'loss', 'content': 0.0680052787065506, 'timestamp': '2025-09-30 22:27:44.305902', 'step': 12686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.337597', 'step': 12686, 'epoch': 2} {'type': 'loss', 'content': 0.13244405388832092, 'timestamp': '2025-09-30 22:27:44.341839', 'step': 12687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:44.373393', 'step': 12687, 'epoch': 2} {'type': 'loss', 'content': 0.07969633489847183, 'timestamp': '2025-09-30 22:27:44.399133', 'step': 12688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.430527', 'step': 12688, 'epoch': 2} {'type': 'loss', 'content': 0.1546120047569275, 'timestamp': '2025-09-30 22:27:44.443505', 'step': 12689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.475732', 'step': 12689, 'epoch': 2} {'type': 'loss', 'content': 0.11659924685955048, 'timestamp': '2025-09-30 22:27:44.491055', 'step': 12690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:44.532241', 'step': 12690, 'epoch': 2} {'type': 'loss', 'content': 0.06008918955922127, 'timestamp': '2025-09-30 22:27:44.536027', 'step': 12691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:44.566504', 'step': 12691, 'epoch': 2} {'type': 'loss', 'content': 0.03912409022450447, 'timestamp': '2025-09-30 22:27:44.592239', 'step': 12692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.625357', 'step': 12692, 'epoch': 2} {'type': 'loss', 'content': 0.0386066734790802, 'timestamp': '2025-09-30 22:27:44.628996', 'step': 12693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.661184', 'step': 12693, 'epoch': 2} {'type': 'loss', 'content': 0.16629719734191895, 'timestamp': '2025-09-30 22:27:44.664621', 'step': 12694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.697133', 'step': 12694, 'epoch': 2} {'type': 'loss', 'content': 0.028444847092032433, 'timestamp': '2025-09-30 22:27:44.701752', 'step': 12695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:44.733329', 'step': 12695, 'epoch': 2} {'type': 'loss', 'content': 0.09047526866197586, 'timestamp': '2025-09-30 22:27:44.759899', 'step': 12696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:44.792346', 'step': 12696, 'epoch': 2} {'type': 'loss', 'content': 0.06700991839170456, 'timestamp': '2025-09-30 22:27:44.796700', 'step': 12697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:44.828809', 'step': 12697, 'epoch': 2} {'type': 'loss', 'content': 0.16701087355613708, 'timestamp': '2025-09-30 22:27:44.832178', 'step': 12698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:44.870196', 'step': 12698, 'epoch': 2} {'type': 'loss', 'content': 0.11752837896347046, 'timestamp': '2025-09-30 22:27:44.885474', 'step': 12699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:44.917227', 'step': 12699, 'epoch': 2} {'type': 'loss', 'content': 0.13393622636795044, 'timestamp': '2025-09-30 22:27:44.943783', 'step': 12700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:44.988397', 'step': 12700, 'epoch': 2} {'type': 'loss', 'content': 0.06713087856769562, 'timestamp': '2025-09-30 22:27:44.991660', 'step': 12701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:45.033551', 'step': 12701, 'epoch': 2} {'type': 'loss', 'content': 0.09482350200414658, 'timestamp': '2025-09-30 22:27:45.036869', 'step': 12702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:45.078183', 'step': 12702, 'epoch': 2} {'type': 'loss', 'content': 0.0836968943476677, 'timestamp': '2025-09-30 22:27:45.091337', 'step': 12703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:45.145283', 'step': 12703, 'epoch': 2} {'type': 'loss', 'content': 0.12747742235660553, 'timestamp': '2025-09-30 22:27:45.171231', 'step': 12704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:45.211855', 'step': 12704, 'epoch': 2} {'type': 'loss', 'content': 0.04126492142677307, 'timestamp': '2025-09-30 22:27:45.217493', 'step': 12705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.251480', 'step': 12705, 'epoch': 2} {'type': 'loss', 'content': 0.1002063900232315, 'timestamp': '2025-09-30 22:27:45.266879', 'step': 12706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:45.309270', 'step': 12706, 'epoch': 2} {'type': 'loss', 'content': 0.1290164589881897, 'timestamp': '2025-09-30 22:27:45.315139', 'step': 12707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:45.365636', 'step': 12707, 'epoch': 2} {'type': 'loss', 'content': 0.09478209167718887, 'timestamp': '2025-09-30 22:27:45.400594', 'step': 12708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.430872', 'step': 12708, 'epoch': 2} {'type': 'loss', 'content': 0.07461550831794739, 'timestamp': '2025-09-30 22:27:45.446603', 'step': 12709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:45.480573', 'step': 12709, 'epoch': 2} {'type': 'loss', 'content': 0.13765186071395874, 'timestamp': '2025-09-30 22:27:45.484021', 'step': 12710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.526157', 'step': 12710, 'epoch': 2} {'type': 'loss', 'content': 0.07521279156208038, 'timestamp': '2025-09-30 22:27:45.541669', 'step': 12711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:45.584582', 'step': 12711, 'epoch': 2} {'type': 'loss', 'content': 0.09881561994552612, 'timestamp': '2025-09-30 22:27:45.613491', 'step': 12712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.648360', 'step': 12712, 'epoch': 2} {'type': 'loss', 'content': 0.13037237524986267, 'timestamp': '2025-09-30 22:27:45.659160', 'step': 12713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.697416', 'step': 12713, 'epoch': 2} {'type': 'loss', 'content': 0.15312638878822327, 'timestamp': '2025-09-30 22:27:45.711911', 'step': 12714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:45.752534', 'step': 12714, 'epoch': 2} {'type': 'loss', 'content': 0.17766200006008148, 'timestamp': '2025-09-30 22:27:45.757505', 'step': 12715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.789569', 'step': 12715, 'epoch': 2} {'type': 'loss', 'content': 0.10635457187891006, 'timestamp': '2025-09-30 22:27:45.825042', 'step': 12716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:45.858467', 'step': 12716, 'epoch': 2} {'type': 'loss', 'content': 0.10835704952478409, 'timestamp': '2025-09-30 22:27:45.870289', 'step': 12717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:45.906140', 'step': 12717, 'epoch': 2} {'type': 'loss', 'content': 0.07986404001712799, 'timestamp': '2025-09-30 22:27:45.918673', 'step': 12718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:45.955554', 'step': 12718, 'epoch': 2} {'type': 'loss', 'content': 0.041947685182094574, 'timestamp': '2025-09-30 22:27:45.959322', 'step': 12719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:45.990276', 'step': 12719, 'epoch': 2} {'type': 'loss', 'content': 0.05109064653515816, 'timestamp': '2025-09-30 22:27:46.015435', 'step': 12720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:46.057733', 'step': 12720, 'epoch': 2} {'type': 'loss', 'content': 0.06849491596221924, 'timestamp': '2025-09-30 22:27:46.064269', 'step': 12721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.101781', 'step': 12721, 'epoch': 2} {'type': 'loss', 'content': 0.07513447850942612, 'timestamp': '2025-09-30 22:27:46.111936', 'step': 12722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:46.147986', 'step': 12722, 'epoch': 2} {'type': 'loss', 'content': 0.04862860217690468, 'timestamp': '2025-09-30 22:27:46.154691', 'step': 12723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:46.186672', 'step': 12723, 'epoch': 2} {'type': 'loss', 'content': 0.06979586184024811, 'timestamp': '2025-09-30 22:27:46.220542', 'step': 12724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:46.262812', 'step': 12724, 'epoch': 2} {'type': 'loss', 'content': 0.16482286155223846, 'timestamp': '2025-09-30 22:27:46.278345', 'step': 12725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:46.317108', 'step': 12725, 'epoch': 2} {'type': 'loss', 'content': 0.11891363561153412, 'timestamp': '2025-09-30 22:27:46.333666', 'step': 12726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:46.374086', 'step': 12726, 'epoch': 2} {'type': 'loss', 'content': 0.10112959891557693, 'timestamp': '2025-09-30 22:27:46.379728', 'step': 12727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.411269', 'step': 12727, 'epoch': 2} {'type': 'loss', 'content': 0.054123133420944214, 'timestamp': '2025-09-30 22:27:46.440992', 'step': 12728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:46.483681', 'step': 12728, 'epoch': 2} {'type': 'loss', 'content': 0.12492498010396957, 'timestamp': '2025-09-30 22:27:46.497791', 'step': 12729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.532900', 'step': 12729, 'epoch': 2} {'type': 'loss', 'content': 0.12155735492706299, 'timestamp': '2025-09-30 22:27:46.538712', 'step': 12730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.572432', 'step': 12730, 'epoch': 2} {'type': 'loss', 'content': 0.13884860277175903, 'timestamp': '2025-09-30 22:27:46.575601', 'step': 12731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.607983', 'step': 12731, 'epoch': 2} {'type': 'loss', 'content': 0.038254182785749435, 'timestamp': '2025-09-30 22:27:46.634088', 'step': 12732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:46.675714', 'step': 12732, 'epoch': 2} {'type': 'loss', 'content': 0.1629209965467453, 'timestamp': '2025-09-30 22:27:46.681365', 'step': 12733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:46.725014', 'step': 12733, 'epoch': 2} {'type': 'loss', 'content': 0.18900799751281738, 'timestamp': '2025-09-30 22:27:46.739540', 'step': 12734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:46.787874', 'step': 12734, 'epoch': 2} {'type': 'loss', 'content': 0.15119311213493347, 'timestamp': '2025-09-30 22:27:46.800886', 'step': 12735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:46.842057', 'step': 12735, 'epoch': 2} {'type': 'loss', 'content': 0.11050423979759216, 'timestamp': '2025-09-30 22:27:46.866689', 'step': 12736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:46.898324', 'step': 12736, 'epoch': 2} {'type': 'loss', 'content': 0.04660904407501221, 'timestamp': '2025-09-30 22:27:46.902120', 'step': 12737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:46.933536', 'step': 12737, 'epoch': 2} {'type': 'loss', 'content': 0.10534830391407013, 'timestamp': '2025-09-30 22:27:46.937129', 'step': 12738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:46.975330', 'step': 12738, 'epoch': 2} {'type': 'loss', 'content': 0.18320758640766144, 'timestamp': '2025-09-30 22:27:46.978596', 'step': 12739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.009510', 'step': 12739, 'epoch': 2} {'type': 'loss', 'content': 0.07132424414157867, 'timestamp': '2025-09-30 22:27:47.040409', 'step': 12740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.079318', 'step': 12740, 'epoch': 2} {'type': 'loss', 'content': 0.1677345484495163, 'timestamp': '2025-09-30 22:27:47.082038', 'step': 12741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:47.113756', 'step': 12741, 'epoch': 2} {'type': 'loss', 'content': 0.16232801973819733, 'timestamp': '2025-09-30 22:27:47.117661', 'step': 12742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:47.150495', 'step': 12742, 'epoch': 2} {'type': 'loss', 'content': 0.09307476878166199, 'timestamp': '2025-09-30 22:27:47.154368', 'step': 12743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:47.186608', 'step': 12743, 'epoch': 2} {'type': 'loss', 'content': 0.09304234385490417, 'timestamp': '2025-09-30 22:27:47.211892', 'step': 12744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.242888', 'step': 12744, 'epoch': 2} {'type': 'loss', 'content': 0.1385492980480194, 'timestamp': '2025-09-30 22:27:47.245725', 'step': 12745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:47.277860', 'step': 12745, 'epoch': 2} {'type': 'loss', 'content': 0.23815584182739258, 'timestamp': '2025-09-30 22:27:47.281302', 'step': 12746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:47.315404', 'step': 12746, 'epoch': 2} {'type': 'loss', 'content': 0.08461307734251022, 'timestamp': '2025-09-30 22:27:47.330752', 'step': 12747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.362100', 'step': 12747, 'epoch': 2} {'type': 'loss', 'content': 0.08264101296663284, 'timestamp': '2025-09-30 22:27:47.392888', 'step': 12748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:47.424260', 'step': 12748, 'epoch': 2} {'type': 'loss', 'content': 0.17162780463695526, 'timestamp': '2025-09-30 22:27:47.426879', 'step': 12749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:47.458140', 'step': 12749, 'epoch': 2} {'type': 'loss', 'content': 0.10584039986133575, 'timestamp': '2025-09-30 22:27:47.460946', 'step': 12750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:47.493079', 'step': 12750, 'epoch': 2} {'type': 'loss', 'content': 0.16045987606048584, 'timestamp': '2025-09-30 22:27:47.496118', 'step': 12751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:47.527518', 'step': 12751, 'epoch': 2} {'type': 'loss', 'content': 0.11456481367349625, 'timestamp': '2025-09-30 22:27:47.553850', 'step': 12752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:47.597388', 'step': 12752, 'epoch': 2} {'type': 'loss', 'content': 0.1765352189540863, 'timestamp': '2025-09-30 22:27:47.600650', 'step': 12753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.633873', 'step': 12753, 'epoch': 2} {'type': 'loss', 'content': 0.0677330270409584, 'timestamp': '2025-09-30 22:27:47.636059', 'step': 12754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:47.673833', 'step': 12754, 'epoch': 2} {'type': 'loss', 'content': 0.06982878595590591, 'timestamp': '2025-09-30 22:27:47.685068', 'step': 12755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:47.723354', 'step': 12755, 'epoch': 2} {'type': 'loss', 'content': 0.09043580293655396, 'timestamp': '2025-09-30 22:27:47.748132', 'step': 12756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.779749', 'step': 12756, 'epoch': 2} {'type': 'loss', 'content': 0.15976738929748535, 'timestamp': '2025-09-30 22:27:47.784241', 'step': 12757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.817315', 'step': 12757, 'epoch': 2} {'type': 'loss', 'content': 0.10790415108203888, 'timestamp': '2025-09-30 22:27:47.821306', 'step': 12758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:47.855115', 'step': 12758, 'epoch': 2} {'type': 'loss', 'content': 0.12405353784561157, 'timestamp': '2025-09-30 22:27:47.859330', 'step': 12759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:47.890255', 'step': 12759, 'epoch': 2} {'type': 'loss', 'content': 0.12382979691028595, 'timestamp': '2025-09-30 22:27:47.914114', 'step': 12760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:47.946851', 'step': 12760, 'epoch': 2} {'type': 'loss', 'content': 0.10423154383897781, 'timestamp': '2025-09-30 22:27:47.950414', 'step': 12761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:47.982406', 'step': 12761, 'epoch': 2} {'type': 'loss', 'content': 0.10154534131288528, 'timestamp': '2025-09-30 22:27:47.986265', 'step': 12762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.025715', 'step': 12762, 'epoch': 2} {'type': 'loss', 'content': 0.16187931597232819, 'timestamp': '2025-09-30 22:27:48.028829', 'step': 12763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.060367', 'step': 12763, 'epoch': 2} {'type': 'loss', 'content': 0.04128752648830414, 'timestamp': '2025-09-30 22:27:48.091985', 'step': 12764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.132263', 'step': 12764, 'epoch': 2} {'type': 'loss', 'content': 0.055031903088092804, 'timestamp': '2025-09-30 22:27:48.135411', 'step': 12765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:48.166813', 'step': 12765, 'epoch': 2} {'type': 'loss', 'content': 0.09631448984146118, 'timestamp': '2025-09-30 22:27:48.169582', 'step': 12766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.202516', 'step': 12766, 'epoch': 2} {'type': 'loss', 'content': 0.11024455726146698, 'timestamp': '2025-09-30 22:27:48.206597', 'step': 12767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:48.238332', 'step': 12767, 'epoch': 2} {'type': 'loss', 'content': 0.11387566477060318, 'timestamp': '2025-09-30 22:27:48.263402', 'step': 12768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.294769', 'step': 12768, 'epoch': 2} {'type': 'loss', 'content': 0.11552608758211136, 'timestamp': '2025-09-30 22:27:48.298161', 'step': 12769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:48.332235', 'step': 12769, 'epoch': 2} {'type': 'loss', 'content': 0.11272530257701874, 'timestamp': '2025-09-30 22:27:48.336582', 'step': 12770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.369547', 'step': 12770, 'epoch': 2} {'type': 'loss', 'content': 0.05092674866318703, 'timestamp': '2025-09-30 22:27:48.374462', 'step': 12771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:48.406756', 'step': 12771, 'epoch': 2} {'type': 'loss', 'content': 0.08528085798025131, 'timestamp': '2025-09-30 22:27:48.430993', 'step': 12772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:48.476001', 'step': 12772, 'epoch': 2} {'type': 'loss', 'content': 0.14451143145561218, 'timestamp': '2025-09-30 22:27:48.479356', 'step': 12773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.516889', 'step': 12773, 'epoch': 2} {'type': 'loss', 'content': 0.23567114770412445, 'timestamp': '2025-09-30 22:27:48.519462', 'step': 12774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:48.552086', 'step': 12774, 'epoch': 2} {'type': 'loss', 'content': 0.10268612951040268, 'timestamp': '2025-09-30 22:27:48.554551', 'step': 12775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:48.586279', 'step': 12775, 'epoch': 2} {'type': 'loss', 'content': 0.1465689092874527, 'timestamp': '2025-09-30 22:27:48.618539', 'step': 12776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:48.651079', 'step': 12776, 'epoch': 2} {'type': 'loss', 'content': 0.07127130776643753, 'timestamp': '2025-09-30 22:27:48.653816', 'step': 12777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.685732', 'step': 12777, 'epoch': 2} {'type': 'loss', 'content': 0.06176826357841492, 'timestamp': '2025-09-30 22:27:48.689596', 'step': 12778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.721101', 'step': 12778, 'epoch': 2} {'type': 'loss', 'content': 0.079253651201725, 'timestamp': '2025-09-30 22:27:48.731058', 'step': 12779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:48.780144', 'step': 12779, 'epoch': 2} {'type': 'loss', 'content': 0.11553030461072922, 'timestamp': '2025-09-30 22:27:48.805085', 'step': 12780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:48.838455', 'step': 12780, 'epoch': 2} {'type': 'loss', 'content': 0.09675823897123337, 'timestamp': '2025-09-30 22:27:48.842053', 'step': 12781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:48.873795', 'step': 12781, 'epoch': 2} {'type': 'loss', 'content': 0.07617905735969543, 'timestamp': '2025-09-30 22:27:48.877371', 'step': 12782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:48.911422', 'step': 12782, 'epoch': 2} {'type': 'loss', 'content': 0.1067836657166481, 'timestamp': '2025-09-30 22:27:48.922300', 'step': 12783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:48.953793', 'step': 12783, 'epoch': 2} {'type': 'loss', 'content': 0.21276643872261047, 'timestamp': '2025-09-30 22:27:48.979292', 'step': 12784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.011084', 'step': 12784, 'epoch': 2} {'type': 'loss', 'content': 0.08921822160482407, 'timestamp': '2025-09-30 22:27:49.013716', 'step': 12785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.045188', 'step': 12785, 'epoch': 2} {'type': 'loss', 'content': 0.07013433426618576, 'timestamp': '2025-09-30 22:27:49.050925', 'step': 12786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.084165', 'step': 12786, 'epoch': 2} {'type': 'loss', 'content': 0.06257926672697067, 'timestamp': '2025-09-30 22:27:49.090616', 'step': 12787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.121354', 'step': 12787, 'epoch': 2} {'type': 'loss', 'content': 0.08910386264324188, 'timestamp': '2025-09-30 22:27:49.158379', 'step': 12788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.192416', 'step': 12788, 'epoch': 2} {'type': 'loss', 'content': 0.11686571687459946, 'timestamp': '2025-09-30 22:27:49.197120', 'step': 12789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:27:49.238696', 'step': 12789, 'epoch': 2} {'type': 'loss', 'content': 0.07839133590459824, 'timestamp': '2025-09-30 22:27:49.243365', 'step': 12790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.276671', 'step': 12790, 'epoch': 2} {'type': 'loss', 'content': 0.10902916640043259, 'timestamp': '2025-09-30 22:27:49.279117', 'step': 12791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.309912', 'step': 12791, 'epoch': 2} {'type': 'loss', 'content': 0.1522074043750763, 'timestamp': '2025-09-30 22:27:49.334234', 'step': 12792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.365846', 'step': 12792, 'epoch': 2} {'type': 'loss', 'content': 0.12693840265274048, 'timestamp': '2025-09-30 22:27:49.369604', 'step': 12793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.400671', 'step': 12793, 'epoch': 2} {'type': 'loss', 'content': 0.056021206080913544, 'timestamp': '2025-09-30 22:27:49.404641', 'step': 12794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.452273', 'step': 12794, 'epoch': 2} {'type': 'loss', 'content': 0.17875301837921143, 'timestamp': '2025-09-30 22:27:49.456088', 'step': 12795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.495352', 'step': 12795, 'epoch': 2} {'type': 'loss', 'content': 0.12845855951309204, 'timestamp': '2025-09-30 22:27:49.520088', 'step': 12796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.551798', 'step': 12796, 'epoch': 2} {'type': 'loss', 'content': 0.12818756699562073, 'timestamp': '2025-09-30 22:27:49.555357', 'step': 12797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.586552', 'step': 12797, 'epoch': 2} {'type': 'loss', 'content': 0.11330190300941467, 'timestamp': '2025-09-30 22:27:49.596899', 'step': 12798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:49.628848', 'step': 12798, 'epoch': 2} {'type': 'loss', 'content': 0.13142429292201996, 'timestamp': '2025-09-30 22:27:49.633665', 'step': 12799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.665188', 'step': 12799, 'epoch': 2} {'type': 'loss', 'content': 0.08171126246452332, 'timestamp': '2025-09-30 22:27:49.692446', 'step': 12800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.727381', 'step': 12800, 'epoch': 2} {'type': 'loss', 'content': 0.05568879842758179, 'timestamp': '2025-09-30 22:27:49.730262', 'step': 12801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.762256', 'step': 12801, 'epoch': 2} {'type': 'loss', 'content': 0.012370125390589237, 'timestamp': '2025-09-30 22:27:49.766836', 'step': 12802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.799267', 'step': 12802, 'epoch': 2} {'type': 'loss', 'content': 0.15890902280807495, 'timestamp': '2025-09-30 22:27:49.804332', 'step': 12803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:49.835064', 'step': 12803, 'epoch': 2} {'type': 'loss', 'content': 0.1316298246383667, 'timestamp': '2025-09-30 22:27:49.860745', 'step': 12804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.893286', 'step': 12804, 'epoch': 2} {'type': 'loss', 'content': 0.08283130079507828, 'timestamp': '2025-09-30 22:27:49.896903', 'step': 12805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.929276', 'step': 12805, 'epoch': 2} {'type': 'loss', 'content': 0.13350851833820343, 'timestamp': '2025-09-30 22:27:49.932588', 'step': 12806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:49.963433', 'step': 12806, 'epoch': 2} {'type': 'loss', 'content': 0.16380485892295837, 'timestamp': '2025-09-30 22:27:49.966632', 'step': 12807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:49.998614', 'step': 12807, 'epoch': 2} {'type': 'loss', 'content': 0.1323171854019165, 'timestamp': '2025-09-30 22:27:50.023401', 'step': 12808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.063275', 'step': 12808, 'epoch': 2} {'type': 'loss', 'content': 0.11604294925928116, 'timestamp': '2025-09-30 22:27:50.066884', 'step': 12809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:50.097350', 'step': 12809, 'epoch': 2} {'type': 'loss', 'content': 0.14981409907341003, 'timestamp': '2025-09-30 22:27:50.101574', 'step': 12810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.133882', 'step': 12810, 'epoch': 2} {'type': 'loss', 'content': 0.14137333631515503, 'timestamp': '2025-09-30 22:27:50.137964', 'step': 12811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.169618', 'step': 12811, 'epoch': 2} {'type': 'loss', 'content': 0.1548909991979599, 'timestamp': '2025-09-30 22:27:50.195517', 'step': 12812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.227989', 'step': 12812, 'epoch': 2} {'type': 'loss', 'content': 0.0716245099902153, 'timestamp': '2025-09-30 22:27:50.232053', 'step': 12813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:50.266008', 'step': 12813, 'epoch': 2} {'type': 'loss', 'content': 0.09878401458263397, 'timestamp': '2025-09-30 22:27:50.271248', 'step': 12814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:50.303802', 'step': 12814, 'epoch': 2} {'type': 'loss', 'content': 0.1223483681678772, 'timestamp': '2025-09-30 22:27:50.308436', 'step': 12815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.341627', 'step': 12815, 'epoch': 2} {'type': 'loss', 'content': 0.12213344871997833, 'timestamp': '2025-09-30 22:27:50.366171', 'step': 12816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.405122', 'step': 12816, 'epoch': 2} {'type': 'loss', 'content': 0.11341182887554169, 'timestamp': '2025-09-30 22:27:50.408936', 'step': 12817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:50.442953', 'step': 12817, 'epoch': 2} {'type': 'loss', 'content': 0.07825309783220291, 'timestamp': '2025-09-30 22:27:50.447343', 'step': 12818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.480263', 'step': 12818, 'epoch': 2} {'type': 'loss', 'content': 0.052382927387952805, 'timestamp': '2025-09-30 22:27:50.482614', 'step': 12819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:50.513164', 'step': 12819, 'epoch': 2} {'type': 'loss', 'content': 0.11390051990747452, 'timestamp': '2025-09-30 22:27:50.539559', 'step': 12820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.570387', 'step': 12820, 'epoch': 2} {'type': 'loss', 'content': 0.11229823529720306, 'timestamp': '2025-09-30 22:27:50.573118', 'step': 12821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.604083', 'step': 12821, 'epoch': 2} {'type': 'loss', 'content': 0.08514243364334106, 'timestamp': '2025-09-30 22:27:50.606603', 'step': 12822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.651205', 'step': 12822, 'epoch': 2} {'type': 'loss', 'content': 0.11698491126298904, 'timestamp': '2025-09-30 22:27:50.654355', 'step': 12823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.685753', 'step': 12823, 'epoch': 2} {'type': 'loss', 'content': 0.09734873473644257, 'timestamp': '2025-09-30 22:27:50.712254', 'step': 12824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:50.753588', 'step': 12824, 'epoch': 2} {'type': 'loss', 'content': 0.08693034946918488, 'timestamp': '2025-09-30 22:27:50.760422', 'step': 12825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.793542', 'step': 12825, 'epoch': 2} {'type': 'loss', 'content': 0.1303822547197342, 'timestamp': '2025-09-30 22:27:50.797540', 'step': 12826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.832326', 'step': 12826, 'epoch': 2} {'type': 'loss', 'content': 0.05321153253316879, 'timestamp': '2025-09-30 22:27:50.835884', 'step': 12827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:50.868491', 'step': 12827, 'epoch': 2} {'type': 'loss', 'content': 0.06025339290499687, 'timestamp': '2025-09-30 22:27:50.893284', 'step': 12828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.924024', 'step': 12828, 'epoch': 2} {'type': 'loss', 'content': 0.1243935152888298, 'timestamp': '2025-09-30 22:27:50.927165', 'step': 12829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:50.961463', 'step': 12829, 'epoch': 2} {'type': 'loss', 'content': 0.08953724801540375, 'timestamp': '2025-09-30 22:27:50.975704', 'step': 12830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:51.008860', 'step': 12830, 'epoch': 2} {'type': 'loss', 'content': 0.0975675880908966, 'timestamp': '2025-09-30 22:27:51.013902', 'step': 12831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.045773', 'step': 12831, 'epoch': 2} {'type': 'loss', 'content': 0.15355317294597626, 'timestamp': '2025-09-30 22:27:51.069811', 'step': 12832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:51.101844', 'step': 12832, 'epoch': 2} {'type': 'loss', 'content': 0.07242029160261154, 'timestamp': '2025-09-30 22:27:51.106159', 'step': 12833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.137091', 'step': 12833, 'epoch': 2} {'type': 'loss', 'content': 0.054181717336177826, 'timestamp': '2025-09-30 22:27:51.139898', 'step': 12834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:51.171324', 'step': 12834, 'epoch': 2} {'type': 'loss', 'content': 0.062122978270053864, 'timestamp': '2025-09-30 22:27:51.173356', 'step': 12835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.203587', 'step': 12835, 'epoch': 2} {'type': 'loss', 'content': 0.06776545941829681, 'timestamp': '2025-09-30 22:27:51.227704', 'step': 12836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.259832', 'step': 12836, 'epoch': 2} {'type': 'loss', 'content': 0.07222754508256912, 'timestamp': '2025-09-30 22:27:51.262129', 'step': 12837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.293262', 'step': 12837, 'epoch': 2} {'type': 'loss', 'content': 0.059899091720581055, 'timestamp': '2025-09-30 22:27:51.298417', 'step': 12838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.330758', 'step': 12838, 'epoch': 2} {'type': 'loss', 'content': 0.1427835375070572, 'timestamp': '2025-09-30 22:27:51.335706', 'step': 12839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.367986', 'step': 12839, 'epoch': 2} {'type': 'loss', 'content': 0.1236831471323967, 'timestamp': '2025-09-30 22:27:51.392903', 'step': 12840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:51.423823', 'step': 12840, 'epoch': 2} {'type': 'loss', 'content': 0.09609554708003998, 'timestamp': '2025-09-30 22:27:51.427062', 'step': 12841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.471432', 'step': 12841, 'epoch': 2} {'type': 'loss', 'content': 0.07925336807966232, 'timestamp': '2025-09-30 22:27:51.475074', 'step': 12842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.510833', 'step': 12842, 'epoch': 2} {'type': 'loss', 'content': 0.04392557218670845, 'timestamp': '2025-09-30 22:27:51.515693', 'step': 12843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.548983', 'step': 12843, 'epoch': 2} {'type': 'loss', 'content': 0.12036730349063873, 'timestamp': '2025-09-30 22:27:51.573593', 'step': 12844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:51.612804', 'step': 12844, 'epoch': 2} {'type': 'loss', 'content': 0.09776347130537033, 'timestamp': '2025-09-30 22:27:51.616766', 'step': 12845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:51.650385', 'step': 12845, 'epoch': 2} {'type': 'loss', 'content': 0.05149436742067337, 'timestamp': '2025-09-30 22:27:51.653788', 'step': 12846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.692620', 'step': 12846, 'epoch': 2} {'type': 'loss', 'content': 0.08666066825389862, 'timestamp': '2025-09-30 22:27:51.696126', 'step': 12847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:51.730072', 'step': 12847, 'epoch': 2} {'type': 'loss', 'content': 0.16889294981956482, 'timestamp': '2025-09-30 22:27:51.753743', 'step': 12848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:51.785855', 'step': 12848, 'epoch': 2} {'type': 'loss', 'content': 0.09111102670431137, 'timestamp': '2025-09-30 22:27:51.789468', 'step': 12849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.822051', 'step': 12849, 'epoch': 2} {'type': 'loss', 'content': 0.16830475628376007, 'timestamp': '2025-09-30 22:27:51.825364', 'step': 12850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:51.856064', 'step': 12850, 'epoch': 2} {'type': 'loss', 'content': 0.09049421548843384, 'timestamp': '2025-09-30 22:27:51.862217', 'step': 12851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.892816', 'step': 12851, 'epoch': 2} {'type': 'loss', 'content': 0.21008798480033875, 'timestamp': '2025-09-30 22:27:51.916850', 'step': 12852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:51.948053', 'step': 12852, 'epoch': 2} {'type': 'loss', 'content': 0.0497090145945549, 'timestamp': '2025-09-30 22:27:51.960432', 'step': 12853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:51.992730', 'step': 12853, 'epoch': 2} {'type': 'loss', 'content': 0.11264065653085709, 'timestamp': '2025-09-30 22:27:51.996124', 'step': 12854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.025782', 'step': 12854, 'epoch': 2} {'type': 'loss', 'content': 0.13098342716693878, 'timestamp': '2025-09-30 22:27:52.029676', 'step': 12855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:52.072194', 'step': 12855, 'epoch': 2} {'type': 'loss', 'content': 0.1262393295764923, 'timestamp': '2025-09-30 22:27:52.096347', 'step': 12856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.127271', 'step': 12856, 'epoch': 2} {'type': 'loss', 'content': 0.08889715373516083, 'timestamp': '2025-09-30 22:27:52.130217', 'step': 12857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.161007', 'step': 12857, 'epoch': 2} {'type': 'loss', 'content': 0.153765007853508, 'timestamp': '2025-09-30 22:27:52.164537', 'step': 12858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:52.195637', 'step': 12858, 'epoch': 2} {'type': 'loss', 'content': 0.14753009378910065, 'timestamp': '2025-09-30 22:27:52.200619', 'step': 12859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:52.236819', 'step': 12859, 'epoch': 2} {'type': 'loss', 'content': 0.07879497110843658, 'timestamp': '2025-09-30 22:27:52.261904', 'step': 12860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.294802', 'step': 12860, 'epoch': 2} {'type': 'loss', 'content': 0.060043320059776306, 'timestamp': '2025-09-30 22:27:52.297056', 'step': 12861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:52.327610', 'step': 12861, 'epoch': 2} {'type': 'loss', 'content': 0.09056573361158371, 'timestamp': '2025-09-30 22:27:52.332419', 'step': 12862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:52.365136', 'step': 12862, 'epoch': 2} {'type': 'loss', 'content': 0.1214483380317688, 'timestamp': '2025-09-30 22:27:52.369698', 'step': 12863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:52.400475', 'step': 12863, 'epoch': 2} {'type': 'loss', 'content': 0.05595281720161438, 'timestamp': '2025-09-30 22:27:52.425312', 'step': 12864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.457279', 'step': 12864, 'epoch': 2} {'type': 'loss', 'content': 0.04410354048013687, 'timestamp': '2025-09-30 22:27:52.459434', 'step': 12865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.492410', 'step': 12865, 'epoch': 2} {'type': 'loss', 'content': 0.11675477772951126, 'timestamp': '2025-09-30 22:27:52.495242', 'step': 12866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.526423', 'step': 12866, 'epoch': 2} {'type': 'loss', 'content': 0.09740975499153137, 'timestamp': '2025-09-30 22:27:52.530740', 'step': 12867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:52.566942', 'step': 12867, 'epoch': 2} {'type': 'loss', 'content': 0.09936775267124176, 'timestamp': '2025-09-30 22:27:52.592040', 'step': 12868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:52.624197', 'step': 12868, 'epoch': 2} {'type': 'loss', 'content': 0.053477510809898376, 'timestamp': '2025-09-30 22:27:52.627395', 'step': 12869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:52.669126', 'step': 12869, 'epoch': 2} {'type': 'loss', 'content': 0.10987813025712967, 'timestamp': '2025-09-30 22:27:52.673954', 'step': 12870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.706733', 'step': 12870, 'epoch': 2} {'type': 'loss', 'content': 0.12444201856851578, 'timestamp': '2025-09-30 22:27:52.710232', 'step': 12871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:52.744088', 'step': 12871, 'epoch': 2} {'type': 'loss', 'content': 0.1429671347141266, 'timestamp': '2025-09-30 22:27:52.768893', 'step': 12872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.806100', 'step': 12872, 'epoch': 2} {'type': 'loss', 'content': 0.08108565211296082, 'timestamp': '2025-09-30 22:27:52.811210', 'step': 12873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:52.846173', 'step': 12873, 'epoch': 2} {'type': 'loss', 'content': 0.0503951720893383, 'timestamp': '2025-09-30 22:27:52.859010', 'step': 12874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.891234', 'step': 12874, 'epoch': 2} {'type': 'loss', 'content': 0.112603098154068, 'timestamp': '2025-09-30 22:27:52.894270', 'step': 12875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:52.926263', 'step': 12875, 'epoch': 2} {'type': 'loss', 'content': 0.18333905935287476, 'timestamp': '2025-09-30 22:27:52.951638', 'step': 12876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:52.983211', 'step': 12876, 'epoch': 2} {'type': 'loss', 'content': 0.10236562788486481, 'timestamp': '2025-09-30 22:27:52.986566', 'step': 12877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:53.017463', 'step': 12877, 'epoch': 2} {'type': 'loss', 'content': 0.15331465005874634, 'timestamp': '2025-09-30 22:27:53.020829', 'step': 12878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.051436', 'step': 12878, 'epoch': 2} {'type': 'loss', 'content': 0.050027016550302505, 'timestamp': '2025-09-30 22:27:53.055991', 'step': 12879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.091472', 'step': 12879, 'epoch': 2} {'type': 'loss', 'content': 0.0672980323433876, 'timestamp': '2025-09-30 22:27:53.117606', 'step': 12880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.158394', 'step': 12880, 'epoch': 2} {'type': 'loss', 'content': 0.0725020319223404, 'timestamp': '2025-09-30 22:27:53.162342', 'step': 12881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.195808', 'step': 12881, 'epoch': 2} {'type': 'loss', 'content': 0.1495322436094284, 'timestamp': '2025-09-30 22:27:53.208755', 'step': 12882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.240562', 'step': 12882, 'epoch': 2} {'type': 'loss', 'content': 0.03944261372089386, 'timestamp': '2025-09-30 22:27:53.245861', 'step': 12883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.278603', 'step': 12883, 'epoch': 2} {'type': 'loss', 'content': 0.10853464156389236, 'timestamp': '2025-09-30 22:27:53.302981', 'step': 12884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:53.334169', 'step': 12884, 'epoch': 2} {'type': 'loss', 'content': 0.12146899849176407, 'timestamp': '2025-09-30 22:27:53.344283', 'step': 12885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.382602', 'step': 12885, 'epoch': 2} {'type': 'loss', 'content': 0.0704653263092041, 'timestamp': '2025-09-30 22:27:53.388370', 'step': 12886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.433121', 'step': 12886, 'epoch': 2} {'type': 'loss', 'content': 0.07640737295150757, 'timestamp': '2025-09-30 22:27:53.436679', 'step': 12887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.468324', 'step': 12887, 'epoch': 2} {'type': 'loss', 'content': 0.16376426815986633, 'timestamp': '2025-09-30 22:27:53.495041', 'step': 12888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.528107', 'step': 12888, 'epoch': 2} {'type': 'loss', 'content': 0.049868084490299225, 'timestamp': '2025-09-30 22:27:53.531350', 'step': 12889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.563261', 'step': 12889, 'epoch': 2} {'type': 'loss', 'content': 0.054614048451185226, 'timestamp': '2025-09-30 22:27:53.567626', 'step': 12890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.602693', 'step': 12890, 'epoch': 2} {'type': 'loss', 'content': 0.09122271090745926, 'timestamp': '2025-09-30 22:27:53.605246', 'step': 12891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.642728', 'step': 12891, 'epoch': 2} {'type': 'loss', 'content': 0.08919716626405716, 'timestamp': '2025-09-30 22:27:53.669140', 'step': 12892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.702350', 'step': 12892, 'epoch': 2} {'type': 'loss', 'content': 0.09954661130905151, 'timestamp': '2025-09-30 22:27:53.705918', 'step': 12893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:53.736538', 'step': 12893, 'epoch': 2} {'type': 'loss', 'content': 0.12327113747596741, 'timestamp': '2025-09-30 22:27:53.742245', 'step': 12894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.775413', 'step': 12894, 'epoch': 2} {'type': 'loss', 'content': 0.11571533977985382, 'timestamp': '2025-09-30 22:27:53.779929', 'step': 12895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.819405', 'step': 12895, 'epoch': 2} {'type': 'loss', 'content': 0.08774896711111069, 'timestamp': '2025-09-30 22:27:53.844996', 'step': 12896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.877344', 'step': 12896, 'epoch': 2} {'type': 'loss', 'content': 0.08170046657323837, 'timestamp': '2025-09-30 22:27:53.882112', 'step': 12897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:53.920319', 'step': 12897, 'epoch': 2} {'type': 'loss', 'content': 0.10210703313350677, 'timestamp': '2025-09-30 22:27:53.925229', 'step': 12898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.956474', 'step': 12898, 'epoch': 2} {'type': 'loss', 'content': 0.08712976425886154, 'timestamp': '2025-09-30 22:27:53.960483', 'step': 12899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:53.991993', 'step': 12899, 'epoch': 2} {'type': 'loss', 'content': 0.09085765480995178, 'timestamp': '2025-09-30 22:27:54.016981', 'step': 12900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.065162', 'step': 12900, 'epoch': 2} {'type': 'loss', 'content': 0.14583297073841095, 'timestamp': '2025-09-30 22:27:54.079538', 'step': 12901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.111873', 'step': 12901, 'epoch': 2} {'type': 'loss', 'content': 0.09401670843362808, 'timestamp': '2025-09-30 22:27:54.116078', 'step': 12902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.158247', 'step': 12902, 'epoch': 2} {'type': 'loss', 'content': 0.09649615734815598, 'timestamp': '2025-09-30 22:27:54.161413', 'step': 12903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:54.193317', 'step': 12903, 'epoch': 2} {'type': 'loss', 'content': 0.09912481904029846, 'timestamp': '2025-09-30 22:27:54.219279', 'step': 12904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:54.251226', 'step': 12904, 'epoch': 2} {'type': 'loss', 'content': 0.07269096374511719, 'timestamp': '2025-09-30 22:27:54.254654', 'step': 12905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.286412', 'step': 12905, 'epoch': 2} {'type': 'loss', 'content': 0.13056285679340363, 'timestamp': '2025-09-30 22:27:54.289252', 'step': 12906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.319739', 'step': 12906, 'epoch': 2} {'type': 'loss', 'content': 0.04524314031004906, 'timestamp': '2025-09-30 22:27:54.323740', 'step': 12907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.356314', 'step': 12907, 'epoch': 2} {'type': 'loss', 'content': 0.06333096325397491, 'timestamp': '2025-09-30 22:27:54.381572', 'step': 12908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.412147', 'step': 12908, 'epoch': 2} {'type': 'loss', 'content': 0.10360681265592575, 'timestamp': '2025-09-30 22:27:54.415593', 'step': 12909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:54.447008', 'step': 12909, 'epoch': 2} {'type': 'loss', 'content': 0.09776191413402557, 'timestamp': '2025-09-30 22:27:54.461679', 'step': 12910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.491623', 'step': 12910, 'epoch': 2} {'type': 'loss', 'content': 0.17644602060317993, 'timestamp': '2025-09-30 22:27:54.495179', 'step': 12911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.526171', 'step': 12911, 'epoch': 2} {'type': 'loss', 'content': 0.06905066221952438, 'timestamp': '2025-09-30 22:27:54.551057', 'step': 12912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:54.581604', 'step': 12912, 'epoch': 2} {'type': 'loss', 'content': 0.07972326129674911, 'timestamp': '2025-09-30 22:27:54.583606', 'step': 12913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.617790', 'step': 12913, 'epoch': 2} {'type': 'loss', 'content': 0.16573761403560638, 'timestamp': '2025-09-30 22:27:54.620263', 'step': 12914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.651590', 'step': 12914, 'epoch': 2} {'type': 'loss', 'content': 0.10286065191030502, 'timestamp': '2025-09-30 22:27:54.655380', 'step': 12915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.686485', 'step': 12915, 'epoch': 2} {'type': 'loss', 'content': 0.0818210244178772, 'timestamp': '2025-09-30 22:27:54.716728', 'step': 12916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:54.760566', 'step': 12916, 'epoch': 2} {'type': 'loss', 'content': 0.10716173052787781, 'timestamp': '2025-09-30 22:27:54.764451', 'step': 12917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:54.800161', 'step': 12917, 'epoch': 2} {'type': 'loss', 'content': 0.17870555818080902, 'timestamp': '2025-09-30 22:27:54.803632', 'step': 12918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.834624', 'step': 12918, 'epoch': 2} {'type': 'loss', 'content': 0.08532094955444336, 'timestamp': '2025-09-30 22:27:54.836952', 'step': 12919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.867625', 'step': 12919, 'epoch': 2} {'type': 'loss', 'content': 0.1346539855003357, 'timestamp': '2025-09-30 22:27:54.891821', 'step': 12920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:54.923738', 'step': 12920, 'epoch': 2} {'type': 'loss', 'content': 0.0619339644908905, 'timestamp': '2025-09-30 22:27:54.926659', 'step': 12921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:54.961650', 'step': 12921, 'epoch': 2} {'type': 'loss', 'content': 0.09027149528265, 'timestamp': '2025-09-30 22:27:54.964802', 'step': 12922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:54.995871', 'step': 12922, 'epoch': 2} {'type': 'loss', 'content': 0.05810314416885376, 'timestamp': '2025-09-30 22:27:55.000056', 'step': 12923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:55.033899', 'step': 12923, 'epoch': 2} {'type': 'loss', 'content': 0.041345302015542984, 'timestamp': '2025-09-30 22:27:55.058474', 'step': 12924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.089064', 'step': 12924, 'epoch': 2} {'type': 'loss', 'content': 0.051669906824827194, 'timestamp': '2025-09-30 22:27:55.093686', 'step': 12925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.129739', 'step': 12925, 'epoch': 2} {'type': 'loss', 'content': 0.1329076588153839, 'timestamp': '2025-09-30 22:27:55.132162', 'step': 12926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.163732', 'step': 12926, 'epoch': 2} {'type': 'loss', 'content': 0.10570652782917023, 'timestamp': '2025-09-30 22:27:55.168600', 'step': 12927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.203551', 'step': 12927, 'epoch': 2} {'type': 'loss', 'content': 0.14464086294174194, 'timestamp': '2025-09-30 22:27:55.228690', 'step': 12928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:55.259458', 'step': 12928, 'epoch': 2} {'type': 'loss', 'content': 0.04137028381228447, 'timestamp': '2025-09-30 22:27:55.262253', 'step': 12929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.292977', 'step': 12929, 'epoch': 2} {'type': 'loss', 'content': 0.16740718483924866, 'timestamp': '2025-09-30 22:27:55.296039', 'step': 12930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.328744', 'step': 12930, 'epoch': 2} {'type': 'loss', 'content': 0.17099538445472717, 'timestamp': '2025-09-30 22:27:55.334307', 'step': 12931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.366529', 'step': 12931, 'epoch': 2} {'type': 'loss', 'content': 0.05312478169798851, 'timestamp': '2025-09-30 22:27:55.391795', 'step': 12932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.427154', 'step': 12932, 'epoch': 2} {'type': 'loss', 'content': 0.09805303066968918, 'timestamp': '2025-09-30 22:27:55.429914', 'step': 12933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.460268', 'step': 12933, 'epoch': 2} {'type': 'loss', 'content': 0.12136879563331604, 'timestamp': '2025-09-30 22:27:55.462807', 'step': 12934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:55.494527', 'step': 12934, 'epoch': 2} {'type': 'loss', 'content': 0.08904863148927689, 'timestamp': '2025-09-30 22:27:55.497050', 'step': 12935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.526811', 'step': 12935, 'epoch': 2} {'type': 'loss', 'content': 0.09456782788038254, 'timestamp': '2025-09-30 22:27:55.551906', 'step': 12936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.583597', 'step': 12936, 'epoch': 2} {'type': 'loss', 'content': 0.16223423182964325, 'timestamp': '2025-09-30 22:27:55.587273', 'step': 12937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.618954', 'step': 12937, 'epoch': 2} {'type': 'loss', 'content': 0.13597191870212555, 'timestamp': '2025-09-30 22:27:55.622254', 'step': 12938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:55.654298', 'step': 12938, 'epoch': 2} {'type': 'loss', 'content': 0.12066933512687683, 'timestamp': '2025-09-30 22:27:55.658337', 'step': 12939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:55.691807', 'step': 12939, 'epoch': 2} {'type': 'loss', 'content': 0.09437251091003418, 'timestamp': '2025-09-30 22:27:55.717886', 'step': 12940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:55.748958', 'step': 12940, 'epoch': 2} {'type': 'loss', 'content': 0.16367420554161072, 'timestamp': '2025-09-30 22:27:55.754573', 'step': 12941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.788893', 'step': 12941, 'epoch': 2} {'type': 'loss', 'content': 0.06003672257065773, 'timestamp': '2025-09-30 22:27:55.799148', 'step': 12942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.832182', 'step': 12942, 'epoch': 2} {'type': 'loss', 'content': 0.0816347524523735, 'timestamp': '2025-09-30 22:27:55.835667', 'step': 12943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.877308', 'step': 12943, 'epoch': 2} {'type': 'loss', 'content': 0.1306319236755371, 'timestamp': '2025-09-30 22:27:55.901394', 'step': 12944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:55.933194', 'step': 12944, 'epoch': 2} {'type': 'loss', 'content': 0.08389932662248611, 'timestamp': '2025-09-30 22:27:55.935388', 'step': 12945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:55.976577', 'step': 12945, 'epoch': 2} {'type': 'loss', 'content': 0.16926376521587372, 'timestamp': '2025-09-30 22:27:55.979467', 'step': 12946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:56.014828', 'step': 12946, 'epoch': 2} {'type': 'loss', 'content': 0.05956360325217247, 'timestamp': '2025-09-30 22:27:56.033460', 'step': 12947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.065248', 'step': 12947, 'epoch': 2} {'type': 'loss', 'content': 0.08283015340566635, 'timestamp': '2025-09-30 22:27:56.092070', 'step': 12948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.122504', 'step': 12948, 'epoch': 2} {'type': 'loss', 'content': 0.08034156262874603, 'timestamp': '2025-09-30 22:27:56.128344', 'step': 12949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:56.173173', 'step': 12949, 'epoch': 2} {'type': 'loss', 'content': 0.12710493803024292, 'timestamp': '2025-09-30 22:27:56.176343', 'step': 12950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.209609', 'step': 12950, 'epoch': 2} {'type': 'loss', 'content': 0.03621460869908333, 'timestamp': '2025-09-30 22:27:56.213627', 'step': 12951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.246107', 'step': 12951, 'epoch': 2} {'type': 'loss', 'content': 0.07778199762105942, 'timestamp': '2025-09-30 22:27:56.270800', 'step': 12952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.302102', 'step': 12952, 'epoch': 2} {'type': 'loss', 'content': 0.10793589055538177, 'timestamp': '2025-09-30 22:27:56.305284', 'step': 12953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:56.338046', 'step': 12953, 'epoch': 2} {'type': 'loss', 'content': 0.07299905270338058, 'timestamp': '2025-09-30 22:27:56.341664', 'step': 12954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.379119', 'step': 12954, 'epoch': 2} {'type': 'loss', 'content': 0.1269828826189041, 'timestamp': '2025-09-30 22:27:56.382377', 'step': 12955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.414639', 'step': 12955, 'epoch': 2} {'type': 'loss', 'content': 0.1825108379125595, 'timestamp': '2025-09-30 22:27:56.448160', 'step': 12956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.480958', 'step': 12956, 'epoch': 2} {'type': 'loss', 'content': 0.09840729087591171, 'timestamp': '2025-09-30 22:27:56.483694', 'step': 12957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.518799', 'step': 12957, 'epoch': 2} {'type': 'loss', 'content': 0.05668386444449425, 'timestamp': '2025-09-30 22:27:56.523237', 'step': 12958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.555806', 'step': 12958, 'epoch': 2} {'type': 'loss', 'content': 0.11650101840496063, 'timestamp': '2025-09-30 22:27:56.559254', 'step': 12959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:56.594867', 'step': 12959, 'epoch': 2} {'type': 'loss', 'content': 0.1559101641178131, 'timestamp': '2025-09-30 22:27:56.619193', 'step': 12960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.650834', 'step': 12960, 'epoch': 2} {'type': 'loss', 'content': 0.07818272709846497, 'timestamp': '2025-09-30 22:27:56.655146', 'step': 12961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.688923', 'step': 12961, 'epoch': 2} {'type': 'loss', 'content': 0.1191587820649147, 'timestamp': '2025-09-30 22:27:56.692055', 'step': 12962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.722548', 'step': 12962, 'epoch': 2} {'type': 'loss', 'content': 0.011551677249372005, 'timestamp': '2025-09-30 22:27:56.726383', 'step': 12963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.757918', 'step': 12963, 'epoch': 2} {'type': 'loss', 'content': 0.10477438569068909, 'timestamp': '2025-09-30 22:27:56.782811', 'step': 12964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:56.815098', 'step': 12964, 'epoch': 2} {'type': 'loss', 'content': 0.08897717297077179, 'timestamp': '2025-09-30 22:27:56.829029', 'step': 12965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:27:56.861658', 'step': 12965, 'epoch': 2} {'type': 'loss', 'content': 0.13804589211940765, 'timestamp': '2025-09-30 22:27:56.865953', 'step': 12966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:56.897147', 'step': 12966, 'epoch': 2} {'type': 'loss', 'content': 0.14963480830192566, 'timestamp': '2025-09-30 22:27:56.901284', 'step': 12967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.936292', 'step': 12967, 'epoch': 2} {'type': 'loss', 'content': 0.136593759059906, 'timestamp': '2025-09-30 22:27:56.961396', 'step': 12968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:56.993120', 'step': 12968, 'epoch': 2} {'type': 'loss', 'content': 0.09822843968868256, 'timestamp': '2025-09-30 22:27:56.995256', 'step': 12969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.025637', 'step': 12969, 'epoch': 2} {'type': 'loss', 'content': 0.09234939515590668, 'timestamp': '2025-09-30 22:27:57.028560', 'step': 12970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:57.062590', 'step': 12970, 'epoch': 2} {'type': 'loss', 'content': 0.09712239354848862, 'timestamp': '2025-09-30 22:27:57.064756', 'step': 12971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:57.095593', 'step': 12971, 'epoch': 2} {'type': 'loss', 'content': 0.04710613936185837, 'timestamp': '2025-09-30 22:27:57.121579', 'step': 12972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:57.153567', 'step': 12972, 'epoch': 2} {'type': 'loss', 'content': 0.08521018177270889, 'timestamp': '2025-09-30 22:27:57.156138', 'step': 12973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.188338', 'step': 12973, 'epoch': 2} {'type': 'loss', 'content': 0.0728670209646225, 'timestamp': '2025-09-30 22:27:57.192362', 'step': 12974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:57.225632', 'step': 12974, 'epoch': 2} {'type': 'loss', 'content': 0.10643608868122101, 'timestamp': '2025-09-30 22:27:57.231182', 'step': 12975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.264211', 'step': 12975, 'epoch': 2} {'type': 'loss', 'content': 0.05055484548211098, 'timestamp': '2025-09-30 22:27:57.288547', 'step': 12976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:57.319307', 'step': 12976, 'epoch': 2} {'type': 'loss', 'content': 0.08670992404222488, 'timestamp': '2025-09-30 22:27:57.325809', 'step': 12977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:57.359763', 'step': 12977, 'epoch': 2} {'type': 'loss', 'content': 0.13112440705299377, 'timestamp': '2025-09-30 22:27:57.363820', 'step': 12978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:57.404190', 'step': 12978, 'epoch': 2} {'type': 'loss', 'content': 0.10414481163024902, 'timestamp': '2025-09-30 22:27:57.409446', 'step': 12979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.442997', 'step': 12979, 'epoch': 2} {'type': 'loss', 'content': 0.06917599588632584, 'timestamp': '2025-09-30 22:27:57.468286', 'step': 12980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:57.500396', 'step': 12980, 'epoch': 2} {'type': 'loss', 'content': 0.12169480323791504, 'timestamp': '2025-09-30 22:27:57.502619', 'step': 12981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:57.533962', 'step': 12981, 'epoch': 2} {'type': 'loss', 'content': 0.06824325025081635, 'timestamp': '2025-09-30 22:27:57.536801', 'step': 12982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:27:57.567750', 'step': 12982, 'epoch': 2} {'type': 'loss', 'content': 0.0525788851082325, 'timestamp': '2025-09-30 22:27:57.571102', 'step': 12983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:57.616315', 'step': 12983, 'epoch': 2} {'type': 'loss', 'content': 0.07902053743600845, 'timestamp': '2025-09-30 22:27:57.641075', 'step': 12984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:57.679296', 'step': 12984, 'epoch': 2} {'type': 'loss', 'content': 0.04969341680407524, 'timestamp': '2025-09-30 22:27:57.683237', 'step': 12985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:27:57.714784', 'step': 12985, 'epoch': 2} {'type': 'loss', 'content': 0.16123834252357483, 'timestamp': '2025-09-30 22:27:57.721621', 'step': 12986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:27:57.754409', 'step': 12986, 'epoch': 2} {'type': 'loss', 'content': 0.08009324967861176, 'timestamp': '2025-09-30 22:27:57.759944', 'step': 12987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:27:57.792386', 'step': 12987, 'epoch': 2} {'type': 'loss', 'content': 0.0973992720246315, 'timestamp': '2025-09-30 22:27:57.819501', 'step': 12988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.850177', 'step': 12988, 'epoch': 2} {'type': 'loss', 'content': 0.06954283267259598, 'timestamp': '2025-09-30 22:27:57.856402', 'step': 12989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:57.888901', 'step': 12989, 'epoch': 2} {'type': 'loss', 'content': 0.0405818447470665, 'timestamp': '2025-09-30 22:27:57.891791', 'step': 12990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.922993', 'step': 12990, 'epoch': 2} {'type': 'loss', 'content': 0.12899556756019592, 'timestamp': '2025-09-30 22:27:57.926023', 'step': 12991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:57.962745', 'step': 12991, 'epoch': 2} {'type': 'loss', 'content': 0.15797258913516998, 'timestamp': '2025-09-30 22:27:57.988436', 'step': 12992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:58.026555', 'step': 12992, 'epoch': 2} {'type': 'loss', 'content': 0.08955379575490952, 'timestamp': '2025-09-30 22:27:58.031637', 'step': 12993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:58.062980', 'step': 12993, 'epoch': 2} {'type': 'loss', 'content': 0.16128216683864594, 'timestamp': '2025-09-30 22:27:58.066585', 'step': 12994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:27:58.098865', 'step': 12994, 'epoch': 2} {'type': 'loss', 'content': 0.1307239979505539, 'timestamp': '2025-09-30 22:27:58.103370', 'step': 12995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:58.145323', 'step': 12995, 'epoch': 2} {'type': 'loss', 'content': 0.14048966765403748, 'timestamp': '2025-09-30 22:27:58.170408', 'step': 12996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:27:58.202395', 'step': 12996, 'epoch': 2} {'type': 'loss', 'content': 0.07149741798639297, 'timestamp': '2025-09-30 22:27:58.206173', 'step': 12997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:27:58.252507', 'step': 12997, 'epoch': 2} {'type': 'loss', 'content': 0.12577809393405914, 'timestamp': '2025-09-30 22:27:58.258479', 'step': 12998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:27:58.299991', 'step': 12998, 'epoch': 2} {'type': 'loss', 'content': 0.18719954788684845, 'timestamp': '2025-09-30 22:27:58.303231', 'step': 12999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:27:58.347638', 'step': 12999, 'epoch': 2} {'type': 'loss', 'content': 0.11950069665908813, 'timestamp': '2025-09-30 22:27:58.372486', 'step': 13000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13000', 'timestamp': '2025-09-30 22:28:03.512551', 'step': 13000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:03.545443', 'step': 13000, 'epoch': 2} {'type': 'loss', 'content': 0.0475936122238636, 'timestamp': '2025-09-30 22:28:03.548609', 'step': 13001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:03.582357', 'step': 13001, 'epoch': 2} {'type': 'loss', 'content': 0.10193570703268051, 'timestamp': '2025-09-30 22:28:03.586313', 'step': 13002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:03.620868', 'step': 13002, 'epoch': 2} {'type': 'loss', 'content': 0.10186134278774261, 'timestamp': '2025-09-30 22:28:03.623309', 'step': 13003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:03.653855', 'step': 13003, 'epoch': 2} {'type': 'loss', 'content': 0.134027361869812, 'timestamp': '2025-09-30 22:28:03.678527', 'step': 13004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:03.709577', 'step': 13004, 'epoch': 2} {'type': 'loss', 'content': 0.08752300590276718, 'timestamp': '2025-09-30 22:28:03.714469', 'step': 13005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:03.745498', 'step': 13005, 'epoch': 2} {'type': 'loss', 'content': 0.10921039432287216, 'timestamp': '2025-09-30 22:28:03.748140', 'step': 13006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:03.780987', 'step': 13006, 'epoch': 2} {'type': 'loss', 'content': 0.05130215734243393, 'timestamp': '2025-09-30 22:28:03.783709', 'step': 13007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:03.814871', 'step': 13007, 'epoch': 2} {'type': 'loss', 'content': 0.13593123853206635, 'timestamp': '2025-09-30 22:28:03.841374', 'step': 13008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:03.873334', 'step': 13008, 'epoch': 2} {'type': 'loss', 'content': 0.14307557046413422, 'timestamp': '2025-09-30 22:28:03.877103', 'step': 13009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:03.912418', 'step': 13009, 'epoch': 2} {'type': 'loss', 'content': 0.07037093490362167, 'timestamp': '2025-09-30 22:28:03.915243', 'step': 13010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:03.946834', 'step': 13010, 'epoch': 2} {'type': 'loss', 'content': 0.2080555111169815, 'timestamp': '2025-09-30 22:28:03.950367', 'step': 13011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:03.988812', 'step': 13011, 'epoch': 2} {'type': 'loss', 'content': 0.17985673248767853, 'timestamp': '2025-09-30 22:28:04.012605', 'step': 13012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.044411', 'step': 13012, 'epoch': 2} {'type': 'loss', 'content': 0.20946012437343597, 'timestamp': '2025-09-30 22:28:04.048304', 'step': 13013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:04.082175', 'step': 13013, 'epoch': 2} {'type': 'loss', 'content': 0.17336779832839966, 'timestamp': '2025-09-30 22:28:04.084296', 'step': 13014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.116199', 'step': 13014, 'epoch': 2} {'type': 'loss', 'content': 0.07339078933000565, 'timestamp': '2025-09-30 22:28:04.118713', 'step': 13015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.149214', 'step': 13015, 'epoch': 2} {'type': 'loss', 'content': 0.0799974575638771, 'timestamp': '2025-09-30 22:28:04.175682', 'step': 13016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.208018', 'step': 13016, 'epoch': 2} {'type': 'loss', 'content': 0.10176778584718704, 'timestamp': '2025-09-30 22:28:04.212420', 'step': 13017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:04.244078', 'step': 13017, 'epoch': 2} {'type': 'loss', 'content': 0.0904078260064125, 'timestamp': '2025-09-30 22:28:04.248485', 'step': 13018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.280191', 'step': 13018, 'epoch': 2} {'type': 'loss', 'content': 0.0906946063041687, 'timestamp': '2025-09-30 22:28:04.283218', 'step': 13019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.315116', 'step': 13019, 'epoch': 2} {'type': 'loss', 'content': 0.07237910479307175, 'timestamp': '2025-09-30 22:28:04.339630', 'step': 13020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.371522', 'step': 13020, 'epoch': 2} {'type': 'loss', 'content': 0.07614625990390778, 'timestamp': '2025-09-30 22:28:04.375509', 'step': 13021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:04.410634', 'step': 13021, 'epoch': 2} {'type': 'loss', 'content': 0.07159101217985153, 'timestamp': '2025-09-30 22:28:04.417039', 'step': 13022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:04.448099', 'step': 13022, 'epoch': 2} {'type': 'loss', 'content': 0.11537544429302216, 'timestamp': '2025-09-30 22:28:04.452540', 'step': 13023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:28:04.487153', 'step': 13023, 'epoch': 2} {'type': 'loss', 'content': 0.10525486618280411, 'timestamp': '2025-09-30 22:28:04.513790', 'step': 13024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:04.545373', 'step': 13024, 'epoch': 2} {'type': 'loss', 'content': 0.12354636192321777, 'timestamp': '2025-09-30 22:28:04.548890', 'step': 13025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.581610', 'step': 13025, 'epoch': 2} {'type': 'loss', 'content': 0.27188563346862793, 'timestamp': '2025-09-30 22:28:04.584760', 'step': 13026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.617339', 'step': 13026, 'epoch': 2} {'type': 'loss', 'content': 0.08680444955825806, 'timestamp': '2025-09-30 22:28:04.620170', 'step': 13027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:04.652585', 'step': 13027, 'epoch': 2} {'type': 'loss', 'content': 0.11317320913076401, 'timestamp': '2025-09-30 22:28:04.680307', 'step': 13028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.717464', 'step': 13028, 'epoch': 2} {'type': 'loss', 'content': 0.13594812154769897, 'timestamp': '2025-09-30 22:28:04.721126', 'step': 13029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.752844', 'step': 13029, 'epoch': 2} {'type': 'loss', 'content': 0.10894214361906052, 'timestamp': '2025-09-30 22:28:04.756858', 'step': 13030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.789762', 'step': 13030, 'epoch': 2} {'type': 'loss', 'content': 0.0726378932595253, 'timestamp': '2025-09-30 22:28:04.792830', 'step': 13031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:04.823663', 'step': 13031, 'epoch': 2} {'type': 'loss', 'content': 0.11249404400587082, 'timestamp': '2025-09-30 22:28:04.849212', 'step': 13032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:04.886420', 'step': 13032, 'epoch': 2} {'type': 'loss', 'content': 0.042547255754470825, 'timestamp': '2025-09-30 22:28:04.889391', 'step': 13033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:04.920869', 'step': 13033, 'epoch': 2} {'type': 'loss', 'content': 0.09225885570049286, 'timestamp': '2025-09-30 22:28:04.922994', 'step': 13034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:04.955012', 'step': 13034, 'epoch': 2} {'type': 'loss', 'content': 0.07980462908744812, 'timestamp': '2025-09-30 22:28:04.958936', 'step': 13035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:04.993039', 'step': 13035, 'epoch': 2} {'type': 'loss', 'content': 0.030197005718946457, 'timestamp': '2025-09-30 22:28:05.017035', 'step': 13036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:05.051906', 'step': 13036, 'epoch': 2} {'type': 'loss', 'content': 0.13347381353378296, 'timestamp': '2025-09-30 22:28:05.054706', 'step': 13037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:05.087873', 'step': 13037, 'epoch': 2} {'type': 'loss', 'content': 0.08177247643470764, 'timestamp': '2025-09-30 22:28:05.091413', 'step': 13038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:05.122088', 'step': 13038, 'epoch': 2} {'type': 'loss', 'content': 0.10304120928049088, 'timestamp': '2025-09-30 22:28:05.123934', 'step': 13039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:05.156092', 'step': 13039, 'epoch': 2} {'type': 'loss', 'content': 0.15081089735031128, 'timestamp': '2025-09-30 22:28:05.180560', 'step': 13040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:05.212307', 'step': 13040, 'epoch': 2} {'type': 'loss', 'content': 0.060057383030653, 'timestamp': '2025-09-30 22:28:05.214464', 'step': 13041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:05.244978', 'step': 13041, 'epoch': 2} {'type': 'loss', 'content': 0.09688562899827957, 'timestamp': '2025-09-30 22:28:05.254608', 'step': 13042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:05.286131', 'step': 13042, 'epoch': 2} {'type': 'loss', 'content': 0.07881591469049454, 'timestamp': '2025-09-30 22:28:05.288647', 'step': 13043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:05.329680', 'step': 13043, 'epoch': 2} {'type': 'loss', 'content': 0.11006127297878265, 'timestamp': '2025-09-30 22:28:05.353685', 'step': 13044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:05.383534', 'step': 13044, 'epoch': 2} {'type': 'loss', 'content': 0.17663782835006714, 'timestamp': '2025-09-30 22:28:05.387842', 'step': 13045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:05.419323', 'step': 13045, 'epoch': 2} {'type': 'loss', 'content': 0.027136297896504402, 'timestamp': '2025-09-30 22:28:05.421628', 'step': 13046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:05.452538', 'step': 13046, 'epoch': 2} {'type': 'loss', 'content': 0.05300896614789963, 'timestamp': '2025-09-30 22:28:05.454481', 'step': 13047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:05.485975', 'step': 13047, 'epoch': 2} {'type': 'loss', 'content': 0.07172948122024536, 'timestamp': '2025-09-30 22:28:05.509583', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:28:13.367533', 'step': 13048, 'epoch': 2} {'type': 'pplx', 'content': 8554.713442305318, 'timestamp': '2025-09-30 22:28:13.373665', 'step': 13048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.403439', 'step': 13048, 'epoch': 2} {'type': 'loss', 'content': 0.09181324392557144, 'timestamp': '2025-09-30 22:28:13.406486', 'step': 13049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:13.439033', 'step': 13049, 'epoch': 2} {'type': 'loss', 'content': 0.07937871664762497, 'timestamp': '2025-09-30 22:28:13.443615', 'step': 13050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.474670', 'step': 13050, 'epoch': 2} {'type': 'loss', 'content': 0.13380521535873413, 'timestamp': '2025-09-30 22:28:13.479072', 'step': 13051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:13.511467', 'step': 13051, 'epoch': 2} {'type': 'loss', 'content': 0.06481701135635376, 'timestamp': '2025-09-30 22:28:13.535810', 'step': 13052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:13.568610', 'step': 13052, 'epoch': 2} {'type': 'loss', 'content': 0.08904043585062027, 'timestamp': '2025-09-30 22:28:13.572901', 'step': 13053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.604172', 'step': 13053, 'epoch': 2} {'type': 'loss', 'content': 0.10032840073108673, 'timestamp': '2025-09-30 22:28:13.609875', 'step': 13054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:13.642343', 'step': 13054, 'epoch': 2} {'type': 'loss', 'content': 0.10465306043624878, 'timestamp': '2025-09-30 22:28:13.648098', 'step': 13055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:13.682840', 'step': 13055, 'epoch': 2} {'type': 'loss', 'content': 0.0807192325592041, 'timestamp': '2025-09-30 22:28:13.709848', 'step': 13056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.743875', 'step': 13056, 'epoch': 2} {'type': 'loss', 'content': 0.19864727556705475, 'timestamp': '2025-09-30 22:28:13.757938', 'step': 13057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.798623', 'step': 13057, 'epoch': 2} {'type': 'loss', 'content': 0.07719508558511734, 'timestamp': '2025-09-30 22:28:13.803390', 'step': 13058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:13.846747', 'step': 13058, 'epoch': 2} {'type': 'loss', 'content': 0.12113058567047119, 'timestamp': '2025-09-30 22:28:13.860049', 'step': 13059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:13.897924', 'step': 13059, 'epoch': 2} {'type': 'loss', 'content': 0.0798589438199997, 'timestamp': '2025-09-30 22:28:13.924070', 'step': 13060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:13.956867', 'step': 13060, 'epoch': 2} {'type': 'loss', 'content': 0.11668083816766739, 'timestamp': '2025-09-30 22:28:13.960133', 'step': 13061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:13.992165', 'step': 13061, 'epoch': 2} {'type': 'loss', 'content': 0.11255471408367157, 'timestamp': '2025-09-30 22:28:14.009341', 'step': 13062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.050353', 'step': 13062, 'epoch': 2} {'type': 'loss', 'content': 0.13422659039497375, 'timestamp': '2025-09-30 22:28:14.054089', 'step': 13063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.091178', 'step': 13063, 'epoch': 2} {'type': 'loss', 'content': 0.0917624905705452, 'timestamp': '2025-09-30 22:28:14.118014', 'step': 13064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.151811', 'step': 13064, 'epoch': 2} {'type': 'loss', 'content': 0.12222227454185486, 'timestamp': '2025-09-30 22:28:14.157047', 'step': 13065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:14.190241', 'step': 13065, 'epoch': 2} {'type': 'loss', 'content': 0.04238452389836311, 'timestamp': '2025-09-30 22:28:14.195487', 'step': 13066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:14.229323', 'step': 13066, 'epoch': 2} {'type': 'loss', 'content': 0.10551561415195465, 'timestamp': '2025-09-30 22:28:14.233481', 'step': 13067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:14.265521', 'step': 13067, 'epoch': 2} {'type': 'loss', 'content': 0.13248030841350555, 'timestamp': '2025-09-30 22:28:14.292173', 'step': 13068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.325653', 'step': 13068, 'epoch': 2} {'type': 'loss', 'content': 0.07000252604484558, 'timestamp': '2025-09-30 22:28:14.329394', 'step': 13069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:14.362754', 'step': 13069, 'epoch': 2} {'type': 'loss', 'content': 0.05178191512823105, 'timestamp': '2025-09-30 22:28:14.367295', 'step': 13070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.405755', 'step': 13070, 'epoch': 2} {'type': 'loss', 'content': 0.06104397773742676, 'timestamp': '2025-09-30 22:28:14.410052', 'step': 13071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:14.443671', 'step': 13071, 'epoch': 2} {'type': 'loss', 'content': 0.0953284204006195, 'timestamp': '2025-09-30 22:28:14.469736', 'step': 13072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:14.513666', 'step': 13072, 'epoch': 2} {'type': 'loss', 'content': 0.12255296856164932, 'timestamp': '2025-09-30 22:28:14.518986', 'step': 13073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.550134', 'step': 13073, 'epoch': 2} {'type': 'loss', 'content': 0.09576128423213959, 'timestamp': '2025-09-30 22:28:14.554717', 'step': 13074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.586984', 'step': 13074, 'epoch': 2} {'type': 'loss', 'content': 0.11664508283138275, 'timestamp': '2025-09-30 22:28:14.590826', 'step': 13075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:14.624053', 'step': 13075, 'epoch': 2} {'type': 'loss', 'content': 0.05340772494673729, 'timestamp': '2025-09-30 22:28:14.648853', 'step': 13076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.692948', 'step': 13076, 'epoch': 2} {'type': 'loss', 'content': 0.10401272028684616, 'timestamp': '2025-09-30 22:28:14.696571', 'step': 13077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.730571', 'step': 13077, 'epoch': 2} {'type': 'loss', 'content': 0.10918668657541275, 'timestamp': '2025-09-30 22:28:14.734277', 'step': 13078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:14.767008', 'step': 13078, 'epoch': 2} {'type': 'loss', 'content': 0.08150450885295868, 'timestamp': '2025-09-30 22:28:14.770385', 'step': 13079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:14.810906', 'step': 13079, 'epoch': 2} {'type': 'loss', 'content': 0.11260005086660385, 'timestamp': '2025-09-30 22:28:14.850491', 'step': 13080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:14.881628', 'step': 13080, 'epoch': 2} {'type': 'loss', 'content': 0.16547593474388123, 'timestamp': '2025-09-30 22:28:14.886158', 'step': 13081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:14.918866', 'step': 13081, 'epoch': 2} {'type': 'loss', 'content': 0.09133017063140869, 'timestamp': '2025-09-30 22:28:14.921591', 'step': 13082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:14.957622', 'step': 13082, 'epoch': 2} {'type': 'loss', 'content': 0.08494579046964645, 'timestamp': '2025-09-30 22:28:14.960799', 'step': 13083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:14.992220', 'step': 13083, 'epoch': 2} {'type': 'loss', 'content': 0.12644989788532257, 'timestamp': '2025-09-30 22:28:15.030505', 'step': 13084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.066987', 'step': 13084, 'epoch': 2} {'type': 'loss', 'content': 0.144307479262352, 'timestamp': '2025-09-30 22:28:15.070398', 'step': 13085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.103621', 'step': 13085, 'epoch': 2} {'type': 'loss', 'content': 0.1788610816001892, 'timestamp': '2025-09-30 22:28:15.107047', 'step': 13086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.139265', 'step': 13086, 'epoch': 2} {'type': 'loss', 'content': 0.21520833671092987, 'timestamp': '2025-09-30 22:28:15.147948', 'step': 13087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.187098', 'step': 13087, 'epoch': 2} {'type': 'loss', 'content': 0.1129743680357933, 'timestamp': '2025-09-30 22:28:15.213639', 'step': 13088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.244936', 'step': 13088, 'epoch': 2} {'type': 'loss', 'content': 0.10253297537565231, 'timestamp': '2025-09-30 22:28:15.249818', 'step': 13089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.289892', 'step': 13089, 'epoch': 2} {'type': 'loss', 'content': 0.1303350329399109, 'timestamp': '2025-09-30 22:28:15.293213', 'step': 13090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.324358', 'step': 13090, 'epoch': 2} {'type': 'loss', 'content': 0.12400828301906586, 'timestamp': '2025-09-30 22:28:15.331816', 'step': 13091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.364331', 'step': 13091, 'epoch': 2} {'type': 'loss', 'content': 0.1396196037530899, 'timestamp': '2025-09-30 22:28:15.388932', 'step': 13092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.420463', 'step': 13092, 'epoch': 2} {'type': 'loss', 'content': 0.12616297602653503, 'timestamp': '2025-09-30 22:28:15.425225', 'step': 13093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.460110', 'step': 13093, 'epoch': 2} {'type': 'loss', 'content': 0.07430620491504669, 'timestamp': '2025-09-30 22:28:15.463019', 'step': 13094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.499663', 'step': 13094, 'epoch': 2} {'type': 'loss', 'content': 0.16335013508796692, 'timestamp': '2025-09-30 22:28:15.502707', 'step': 13095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:15.535162', 'step': 13095, 'epoch': 2} {'type': 'loss', 'content': 0.12677349150180817, 'timestamp': '2025-09-30 22:28:15.562828', 'step': 13096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:15.597271', 'step': 13096, 'epoch': 2} {'type': 'loss', 'content': 0.0888148695230484, 'timestamp': '2025-09-30 22:28:15.600365', 'step': 13097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:15.632579', 'step': 13097, 'epoch': 2} {'type': 'loss', 'content': 0.08292374014854431, 'timestamp': '2025-09-30 22:28:15.636941', 'step': 13098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.669562', 'step': 13098, 'epoch': 2} {'type': 'loss', 'content': 0.13238297402858734, 'timestamp': '2025-09-30 22:28:15.674564', 'step': 13099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:15.711275', 'step': 13099, 'epoch': 2} {'type': 'loss', 'content': 0.10020274668931961, 'timestamp': '2025-09-30 22:28:15.738335', 'step': 13100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.770978', 'step': 13100, 'epoch': 2} {'type': 'loss', 'content': 0.09150992333889008, 'timestamp': '2025-09-30 22:28:15.773945', 'step': 13101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:15.805485', 'step': 13101, 'epoch': 2} {'type': 'loss', 'content': 0.21623414754867554, 'timestamp': '2025-09-30 22:28:15.810300', 'step': 13102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:15.842170', 'step': 13102, 'epoch': 2} {'type': 'loss', 'content': 0.14337414503097534, 'timestamp': '2025-09-30 22:28:15.844887', 'step': 13103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.877118', 'step': 13103, 'epoch': 2} {'type': 'loss', 'content': 0.14980384707450867, 'timestamp': '2025-09-30 22:28:15.902304', 'step': 13104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.934477', 'step': 13104, 'epoch': 2} {'type': 'loss', 'content': 0.10817500948905945, 'timestamp': '2025-09-30 22:28:15.938887', 'step': 13105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:15.972577', 'step': 13105, 'epoch': 2} {'type': 'loss', 'content': 0.12225714325904846, 'timestamp': '2025-09-30 22:28:15.975917', 'step': 13106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.009325', 'step': 13106, 'epoch': 2} {'type': 'loss', 'content': 0.05957847461104393, 'timestamp': '2025-09-30 22:28:16.011897', 'step': 13107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:16.045899', 'step': 13107, 'epoch': 2} {'type': 'loss', 'content': 0.15586665272712708, 'timestamp': '2025-09-30 22:28:16.072382', 'step': 13108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.106505', 'step': 13108, 'epoch': 2} {'type': 'loss', 'content': 0.0986391082406044, 'timestamp': '2025-09-30 22:28:16.110765', 'step': 13109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.143492', 'step': 13109, 'epoch': 2} {'type': 'loss', 'content': 0.08297301083803177, 'timestamp': '2025-09-30 22:28:16.147808', 'step': 13110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.179879', 'step': 13110, 'epoch': 2} {'type': 'loss', 'content': 0.07513631880283356, 'timestamp': '2025-09-30 22:28:16.190694', 'step': 13111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.225116', 'step': 13111, 'epoch': 2} {'type': 'loss', 'content': 0.020528636872768402, 'timestamp': '2025-09-30 22:28:16.253278', 'step': 13112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.286849', 'step': 13112, 'epoch': 2} {'type': 'loss', 'content': 0.0734170600771904, 'timestamp': '2025-09-30 22:28:16.290273', 'step': 13113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:16.322326', 'step': 13113, 'epoch': 2} {'type': 'loss', 'content': 0.1299477070569992, 'timestamp': '2025-09-30 22:28:16.341537', 'step': 13114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.372983', 'step': 13114, 'epoch': 2} {'type': 'loss', 'content': 0.11760739237070084, 'timestamp': '2025-09-30 22:28:16.376124', 'step': 13115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.406717', 'step': 13115, 'epoch': 2} {'type': 'loss', 'content': 0.15437516570091248, 'timestamp': '2025-09-30 22:28:16.431649', 'step': 13116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.462234', 'step': 13116, 'epoch': 2} {'type': 'loss', 'content': 0.08324465155601501, 'timestamp': '2025-09-30 22:28:16.465327', 'step': 13117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.496586', 'step': 13117, 'epoch': 2} {'type': 'loss', 'content': 0.1990559846162796, 'timestamp': '2025-09-30 22:28:16.501429', 'step': 13118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.533765', 'step': 13118, 'epoch': 2} {'type': 'loss', 'content': 0.06323596090078354, 'timestamp': '2025-09-30 22:28:16.536253', 'step': 13119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.573165', 'step': 13119, 'epoch': 2} {'type': 'loss', 'content': 0.08883402496576309, 'timestamp': '2025-09-30 22:28:16.599919', 'step': 13120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.632932', 'step': 13120, 'epoch': 2} {'type': 'loss', 'content': 0.08810359239578247, 'timestamp': '2025-09-30 22:28:16.638565', 'step': 13121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.682233', 'step': 13121, 'epoch': 2} {'type': 'loss', 'content': 0.13068483769893646, 'timestamp': '2025-09-30 22:28:16.688896', 'step': 13122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:16.723475', 'step': 13122, 'epoch': 2} {'type': 'loss', 'content': 0.0622640959918499, 'timestamp': '2025-09-30 22:28:16.728554', 'step': 13123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.761802', 'step': 13123, 'epoch': 2} {'type': 'loss', 'content': 0.0785132423043251, 'timestamp': '2025-09-30 22:28:16.788821', 'step': 13124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.823185', 'step': 13124, 'epoch': 2} {'type': 'loss', 'content': 0.06624707579612732, 'timestamp': '2025-09-30 22:28:16.827621', 'step': 13125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:16.858539', 'step': 13125, 'epoch': 2} {'type': 'loss', 'content': 0.033948346972465515, 'timestamp': '2025-09-30 22:28:16.871382', 'step': 13126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:16.902712', 'step': 13126, 'epoch': 2} {'type': 'loss', 'content': 0.05323013290762901, 'timestamp': '2025-09-30 22:28:16.906299', 'step': 13127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:16.938363', 'step': 13127, 'epoch': 2} {'type': 'loss', 'content': 0.09152267873287201, 'timestamp': '2025-09-30 22:28:16.975651', 'step': 13128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:17.013234', 'step': 13128, 'epoch': 2} {'type': 'loss', 'content': 0.051830802112817764, 'timestamp': '2025-09-30 22:28:17.020350', 'step': 13129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.054593', 'step': 13129, 'epoch': 2} {'type': 'loss', 'content': 0.0599902868270874, 'timestamp': '2025-09-30 22:28:17.057405', 'step': 13130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:17.088552', 'step': 13130, 'epoch': 2} {'type': 'loss', 'content': 0.11487472057342529, 'timestamp': '2025-09-30 22:28:17.098740', 'step': 13131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.134271', 'step': 13131, 'epoch': 2} {'type': 'loss', 'content': 0.12015482038259506, 'timestamp': '2025-09-30 22:28:17.160851', 'step': 13132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:17.193238', 'step': 13132, 'epoch': 2} {'type': 'loss', 'content': 0.16651380062103271, 'timestamp': '2025-09-30 22:28:17.196122', 'step': 13133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.230043', 'step': 13133, 'epoch': 2} {'type': 'loss', 'content': 0.07495144754648209, 'timestamp': '2025-09-30 22:28:17.234893', 'step': 13134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:17.267624', 'step': 13134, 'epoch': 2} {'type': 'loss', 'content': 0.11715979874134064, 'timestamp': '2025-09-30 22:28:17.270009', 'step': 13135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.304073', 'step': 13135, 'epoch': 2} {'type': 'loss', 'content': 0.13744670152664185, 'timestamp': '2025-09-30 22:28:17.331208', 'step': 13136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.363614', 'step': 13136, 'epoch': 2} {'type': 'loss', 'content': 0.09133747965097427, 'timestamp': '2025-09-30 22:28:17.375895', 'step': 13137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.408527', 'step': 13137, 'epoch': 2} {'type': 'loss', 'content': 0.07266956567764282, 'timestamp': '2025-09-30 22:28:17.414774', 'step': 13138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.448834', 'step': 13138, 'epoch': 2} {'type': 'loss', 'content': 0.05468212440609932, 'timestamp': '2025-09-30 22:28:17.453354', 'step': 13139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.486878', 'step': 13139, 'epoch': 2} {'type': 'loss', 'content': 0.09230483323335648, 'timestamp': '2025-09-30 22:28:17.512106', 'step': 13140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:17.546885', 'step': 13140, 'epoch': 2} {'type': 'loss', 'content': 0.10148099809885025, 'timestamp': '2025-09-30 22:28:17.550308', 'step': 13141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.581238', 'step': 13141, 'epoch': 2} {'type': 'loss', 'content': 0.1316072791814804, 'timestamp': '2025-09-30 22:28:17.588121', 'step': 13142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:17.620177', 'step': 13142, 'epoch': 2} {'type': 'loss', 'content': 0.10488983988761902, 'timestamp': '2025-09-30 22:28:17.623438', 'step': 13143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.654308', 'step': 13143, 'epoch': 2} {'type': 'loss', 'content': 0.19663506746292114, 'timestamp': '2025-09-30 22:28:17.680757', 'step': 13144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.732506', 'step': 13144, 'epoch': 2} {'type': 'loss', 'content': 0.0816684439778328, 'timestamp': '2025-09-30 22:28:17.737470', 'step': 13145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:17.769893', 'step': 13145, 'epoch': 2} {'type': 'loss', 'content': 0.11208701133728027, 'timestamp': '2025-09-30 22:28:17.773639', 'step': 13146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:17.804706', 'step': 13146, 'epoch': 2} {'type': 'loss', 'content': 0.09649534523487091, 'timestamp': '2025-09-30 22:28:17.808806', 'step': 13147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:17.844477', 'step': 13147, 'epoch': 2} {'type': 'loss', 'content': 0.11061106622219086, 'timestamp': '2025-09-30 22:28:17.870322', 'step': 13148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:17.902554', 'step': 13148, 'epoch': 2} {'type': 'loss', 'content': 0.10937613993883133, 'timestamp': '2025-09-30 22:28:17.905246', 'step': 13149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:17.936169', 'step': 13149, 'epoch': 2} {'type': 'loss', 'content': 0.14976972341537476, 'timestamp': '2025-09-30 22:28:17.954943', 'step': 13150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:17.989931', 'step': 13150, 'epoch': 2} {'type': 'loss', 'content': 0.11911282688379288, 'timestamp': '2025-09-30 22:28:17.994650', 'step': 13151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:18.027491', 'step': 13151, 'epoch': 2} {'type': 'loss', 'content': 0.0938853770494461, 'timestamp': '2025-09-30 22:28:18.060871', 'step': 13152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:18.093185', 'step': 13152, 'epoch': 2} {'type': 'loss', 'content': 0.062154147773981094, 'timestamp': '2025-09-30 22:28:18.095485', 'step': 13153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.129812', 'step': 13153, 'epoch': 2} {'type': 'loss', 'content': 0.1590188443660736, 'timestamp': '2025-09-30 22:28:18.132500', 'step': 13154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:18.165289', 'step': 13154, 'epoch': 2} {'type': 'loss', 'content': 0.12926281988620758, 'timestamp': '2025-09-30 22:28:18.171737', 'step': 13155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:18.203758', 'step': 13155, 'epoch': 2} {'type': 'loss', 'content': 0.17374730110168457, 'timestamp': '2025-09-30 22:28:18.231388', 'step': 13156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.262940', 'step': 13156, 'epoch': 2} {'type': 'loss', 'content': 0.07912302762269974, 'timestamp': '2025-09-30 22:28:18.286721', 'step': 13157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.318714', 'step': 13157, 'epoch': 2} {'type': 'loss', 'content': 0.1398136466741562, 'timestamp': '2025-09-30 22:28:18.323537', 'step': 13158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:18.355842', 'step': 13158, 'epoch': 2} {'type': 'loss', 'content': 0.09655195474624634, 'timestamp': '2025-09-30 22:28:18.359825', 'step': 13159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:18.391913', 'step': 13159, 'epoch': 2} {'type': 'loss', 'content': 0.03688614070415497, 'timestamp': '2025-09-30 22:28:18.416892', 'step': 13160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.449108', 'step': 13160, 'epoch': 2} {'type': 'loss', 'content': 0.12729795277118683, 'timestamp': '2025-09-30 22:28:18.455054', 'step': 13161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:18.491131', 'step': 13161, 'epoch': 2} {'type': 'loss', 'content': 0.06710722297430038, 'timestamp': '2025-09-30 22:28:18.496486', 'step': 13162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:18.534914', 'step': 13162, 'epoch': 2} {'type': 'loss', 'content': 0.12127459049224854, 'timestamp': '2025-09-30 22:28:18.537576', 'step': 13163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:18.569990', 'step': 13163, 'epoch': 2} {'type': 'loss', 'content': 0.06505372375249863, 'timestamp': '2025-09-30 22:28:18.594473', 'step': 13164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:18.627598', 'step': 13164, 'epoch': 2} {'type': 'loss', 'content': 0.11180641502141953, 'timestamp': '2025-09-30 22:28:18.632018', 'step': 13165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.665473', 'step': 13165, 'epoch': 2} {'type': 'loss', 'content': 0.08423299342393875, 'timestamp': '2025-09-30 22:28:18.684400', 'step': 13166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:18.723600', 'step': 13166, 'epoch': 2} {'type': 'loss', 'content': 0.08433225750923157, 'timestamp': '2025-09-30 22:28:18.728007', 'step': 13167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:18.760981', 'step': 13167, 'epoch': 2} {'type': 'loss', 'content': 0.09400420635938644, 'timestamp': '2025-09-30 22:28:18.796323', 'step': 13168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:18.846499', 'step': 13168, 'epoch': 2} {'type': 'loss', 'content': 0.11154798418283463, 'timestamp': '2025-09-30 22:28:18.858738', 'step': 13169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:18.897195', 'step': 13169, 'epoch': 2} {'type': 'loss', 'content': 0.08989285677671432, 'timestamp': '2025-09-30 22:28:18.905277', 'step': 13170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:18.940341', 'step': 13170, 'epoch': 2} {'type': 'loss', 'content': 0.10115896910429001, 'timestamp': '2025-09-30 22:28:18.950768', 'step': 13171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:18.984833', 'step': 13171, 'epoch': 2} {'type': 'loss', 'content': 0.04201703146100044, 'timestamp': '2025-09-30 22:28:19.008828', 'step': 13172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:19.056146', 'step': 13172, 'epoch': 2} {'type': 'loss', 'content': 0.05979771167039871, 'timestamp': '2025-09-30 22:28:19.063435', 'step': 13173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.100379', 'step': 13173, 'epoch': 2} {'type': 'loss', 'content': 0.10392399877309799, 'timestamp': '2025-09-30 22:28:19.102775', 'step': 13174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.143720', 'step': 13174, 'epoch': 2} {'type': 'loss', 'content': 0.07593860477209091, 'timestamp': '2025-09-30 22:28:19.148819', 'step': 13175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.203376', 'step': 13175, 'epoch': 2} {'type': 'loss', 'content': 0.05610937997698784, 'timestamp': '2025-09-30 22:28:19.231623', 'step': 13176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:19.268517', 'step': 13176, 'epoch': 2} {'type': 'loss', 'content': 0.1179719939827919, 'timestamp': '2025-09-30 22:28:19.271812', 'step': 13177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.305757', 'step': 13177, 'epoch': 2} {'type': 'loss', 'content': 0.08014708757400513, 'timestamp': '2025-09-30 22:28:19.315206', 'step': 13178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.349672', 'step': 13178, 'epoch': 2} {'type': 'loss', 'content': 0.07916760444641113, 'timestamp': '2025-09-30 22:28:19.357061', 'step': 13179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:19.435952', 'step': 13179, 'epoch': 2} {'type': 'loss', 'content': 0.18616501986980438, 'timestamp': '2025-09-30 22:28:19.460633', 'step': 13180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.493400', 'step': 13180, 'epoch': 2} {'type': 'loss', 'content': 0.07056642323732376, 'timestamp': '2025-09-30 22:28:19.499135', 'step': 13181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.540623', 'step': 13181, 'epoch': 2} {'type': 'loss', 'content': 0.10013911128044128, 'timestamp': '2025-09-30 22:28:19.566730', 'step': 13182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.601760', 'step': 13182, 'epoch': 2} {'type': 'loss', 'content': 0.11945879459381104, 'timestamp': '2025-09-30 22:28:19.623372', 'step': 13183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.654360', 'step': 13183, 'epoch': 2} {'type': 'loss', 'content': 0.06347189098596573, 'timestamp': '2025-09-30 22:28:19.692529', 'step': 13184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:19.725665', 'step': 13184, 'epoch': 2} {'type': 'loss', 'content': 0.07584656029939651, 'timestamp': '2025-09-30 22:28:19.728749', 'step': 13185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.769878', 'step': 13185, 'epoch': 2} {'type': 'loss', 'content': 0.1728823035955429, 'timestamp': '2025-09-30 22:28:19.785432', 'step': 13186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:19.819720', 'step': 13186, 'epoch': 2} {'type': 'loss', 'content': 0.08852788805961609, 'timestamp': '2025-09-30 22:28:19.823438', 'step': 13187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:19.874317', 'step': 13187, 'epoch': 2} {'type': 'loss', 'content': 0.06363089382648468, 'timestamp': '2025-09-30 22:28:19.899319', 'step': 13188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:19.935330', 'step': 13188, 'epoch': 2} {'type': 'loss', 'content': 0.08525332063436508, 'timestamp': '2025-09-30 22:28:19.940545', 'step': 13189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:19.973542', 'step': 13189, 'epoch': 2} {'type': 'loss', 'content': 0.13152047991752625, 'timestamp': '2025-09-30 22:28:19.988046', 'step': 13190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:20.027017', 'step': 13190, 'epoch': 2} {'type': 'loss', 'content': 0.07209297269582748, 'timestamp': '2025-09-30 22:28:20.033722', 'step': 13191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:20.067406', 'step': 13191, 'epoch': 2} {'type': 'loss', 'content': 0.10005602240562439, 'timestamp': '2025-09-30 22:28:20.093681', 'step': 13192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.124622', 'step': 13192, 'epoch': 2} {'type': 'loss', 'content': 0.07240971177816391, 'timestamp': '2025-09-30 22:28:20.136855', 'step': 13193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:20.168023', 'step': 13193, 'epoch': 2} {'type': 'loss', 'content': 0.08051496744155884, 'timestamp': '2025-09-30 22:28:20.171704', 'step': 13194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:20.206523', 'step': 13194, 'epoch': 2} {'type': 'loss', 'content': 0.061414480209350586, 'timestamp': '2025-09-30 22:28:20.211248', 'step': 13195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:20.249264', 'step': 13195, 'epoch': 2} {'type': 'loss', 'content': 0.1109912246465683, 'timestamp': '2025-09-30 22:28:20.293294', 'step': 13196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:20.325443', 'step': 13196, 'epoch': 2} {'type': 'loss', 'content': 0.11364266276359558, 'timestamp': '2025-09-30 22:28:20.328814', 'step': 13197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:20.359846', 'step': 13197, 'epoch': 2} {'type': 'loss', 'content': 0.11106843501329422, 'timestamp': '2025-09-30 22:28:20.363104', 'step': 13198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.397771', 'step': 13198, 'epoch': 2} {'type': 'loss', 'content': 0.08905446529388428, 'timestamp': '2025-09-30 22:28:20.418595', 'step': 13199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:20.467514', 'step': 13199, 'epoch': 2} {'type': 'loss', 'content': 0.09452120214700699, 'timestamp': '2025-09-30 22:28:20.495177', 'step': 13200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.543320', 'step': 13200, 'epoch': 2} {'type': 'loss', 'content': 0.07686959207057953, 'timestamp': '2025-09-30 22:28:20.548354', 'step': 13201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:20.582172', 'step': 13201, 'epoch': 2} {'type': 'loss', 'content': 0.08253811299800873, 'timestamp': '2025-09-30 22:28:20.588415', 'step': 13202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.622066', 'step': 13202, 'epoch': 2} {'type': 'loss', 'content': 0.09662356227636337, 'timestamp': '2025-09-30 22:28:20.625857', 'step': 13203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:20.660928', 'step': 13203, 'epoch': 2} {'type': 'loss', 'content': 0.10115788131952286, 'timestamp': '2025-09-30 22:28:20.686056', 'step': 13204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.718574', 'step': 13204, 'epoch': 2} {'type': 'loss', 'content': 0.08530162274837494, 'timestamp': '2025-09-30 22:28:20.722696', 'step': 13205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.753746', 'step': 13205, 'epoch': 2} {'type': 'loss', 'content': 0.1519494503736496, 'timestamp': '2025-09-30 22:28:20.758841', 'step': 13206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.794004', 'step': 13206, 'epoch': 2} {'type': 'loss', 'content': 0.08823537081480026, 'timestamp': '2025-09-30 22:28:20.803557', 'step': 13207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:20.837318', 'step': 13207, 'epoch': 2} {'type': 'loss', 'content': 0.08228026330471039, 'timestamp': '2025-09-30 22:28:20.874039', 'step': 13208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.905892', 'step': 13208, 'epoch': 2} {'type': 'loss', 'content': 0.14463575184345245, 'timestamp': '2025-09-30 22:28:20.910473', 'step': 13209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:20.957576', 'step': 13209, 'epoch': 2} {'type': 'loss', 'content': 0.1438361257314682, 'timestamp': '2025-09-30 22:28:20.969694', 'step': 13210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.006828', 'step': 13210, 'epoch': 2} {'type': 'loss', 'content': 0.11955318599939346, 'timestamp': '2025-09-30 22:28:21.011601', 'step': 13211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.044271', 'step': 13211, 'epoch': 2} {'type': 'loss', 'content': 0.053839270025491714, 'timestamp': '2025-09-30 22:28:21.070251', 'step': 13212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:21.102163', 'step': 13212, 'epoch': 2} {'type': 'loss', 'content': 0.04451563209295273, 'timestamp': '2025-09-30 22:28:21.106766', 'step': 13213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:21.138278', 'step': 13213, 'epoch': 2} {'type': 'loss', 'content': 0.08009589463472366, 'timestamp': '2025-09-30 22:28:21.143779', 'step': 13214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:21.177421', 'step': 13214, 'epoch': 2} {'type': 'loss', 'content': 0.08470901846885681, 'timestamp': '2025-09-30 22:28:21.180608', 'step': 13215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.210719', 'step': 13215, 'epoch': 2} {'type': 'loss', 'content': 0.07759783416986465, 'timestamp': '2025-09-30 22:28:21.237829', 'step': 13216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:21.270558', 'step': 13216, 'epoch': 2} {'type': 'loss', 'content': 0.09802575409412384, 'timestamp': '2025-09-30 22:28:21.274638', 'step': 13217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:21.306108', 'step': 13217, 'epoch': 2} {'type': 'loss', 'content': 0.1676478385925293, 'timestamp': '2025-09-30 22:28:21.309818', 'step': 13218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:21.341071', 'step': 13218, 'epoch': 2} {'type': 'loss', 'content': 0.10531169921159744, 'timestamp': '2025-09-30 22:28:21.344384', 'step': 13219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.381926', 'step': 13219, 'epoch': 2} {'type': 'loss', 'content': 0.07090923190116882, 'timestamp': '2025-09-30 22:28:21.406851', 'step': 13220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.439194', 'step': 13220, 'epoch': 2} {'type': 'loss', 'content': 0.10326701402664185, 'timestamp': '2025-09-30 22:28:21.442392', 'step': 13221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:21.473771', 'step': 13221, 'epoch': 2} {'type': 'loss', 'content': 0.11737598478794098, 'timestamp': '2025-09-30 22:28:21.478609', 'step': 13222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.511598', 'step': 13222, 'epoch': 2} {'type': 'loss', 'content': 0.0325225293636322, 'timestamp': '2025-09-30 22:28:21.515829', 'step': 13223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.555656', 'step': 13223, 'epoch': 2} {'type': 'loss', 'content': 0.07590554654598236, 'timestamp': '2025-09-30 22:28:21.596625', 'step': 13224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.643476', 'step': 13224, 'epoch': 2} {'type': 'loss', 'content': 0.209726482629776, 'timestamp': '2025-09-30 22:28:21.650341', 'step': 13225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:21.683592', 'step': 13225, 'epoch': 2} {'type': 'loss', 'content': 0.1368272602558136, 'timestamp': '2025-09-30 22:28:21.699624', 'step': 13226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:21.737963', 'step': 13226, 'epoch': 2} {'type': 'loss', 'content': 0.11822093278169632, 'timestamp': '2025-09-30 22:28:21.747544', 'step': 13227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:21.783191', 'step': 13227, 'epoch': 2} {'type': 'loss', 'content': 0.1392688900232315, 'timestamp': '2025-09-30 22:28:21.808433', 'step': 13228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:21.839547', 'step': 13228, 'epoch': 2} {'type': 'loss', 'content': 0.1343521773815155, 'timestamp': '2025-09-30 22:28:21.846358', 'step': 13229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:21.879068', 'step': 13229, 'epoch': 2} {'type': 'loss', 'content': 0.044789932668209076, 'timestamp': '2025-09-30 22:28:21.884337', 'step': 13230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:21.915489', 'step': 13230, 'epoch': 2} {'type': 'loss', 'content': 0.18196694552898407, 'timestamp': '2025-09-30 22:28:21.918699', 'step': 13231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:21.952020', 'step': 13231, 'epoch': 2} {'type': 'loss', 'content': 0.22738923132419586, 'timestamp': '2025-09-30 22:28:21.977097', 'step': 13232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.016048', 'step': 13232, 'epoch': 2} {'type': 'loss', 'content': 0.09740595519542694, 'timestamp': '2025-09-30 22:28:22.019257', 'step': 13233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:22.050320', 'step': 13233, 'epoch': 2} {'type': 'loss', 'content': 0.06036388501524925, 'timestamp': '2025-09-30 22:28:22.055015', 'step': 13234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.088213', 'step': 13234, 'epoch': 2} {'type': 'loss', 'content': 0.12117347866296768, 'timestamp': '2025-09-30 22:28:22.091988', 'step': 13235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:22.122762', 'step': 13235, 'epoch': 2} {'type': 'loss', 'content': 0.07090654224157333, 'timestamp': '2025-09-30 22:28:22.148652', 'step': 13236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:22.178308', 'step': 13236, 'epoch': 2} {'type': 'loss', 'content': 0.07461488246917725, 'timestamp': '2025-09-30 22:28:22.182684', 'step': 13237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:22.214309', 'step': 13237, 'epoch': 2} {'type': 'loss', 'content': 0.09802191704511642, 'timestamp': '2025-09-30 22:28:22.219534', 'step': 13238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.252184', 'step': 13238, 'epoch': 2} {'type': 'loss', 'content': 0.13160784542560577, 'timestamp': '2025-09-30 22:28:22.256817', 'step': 13239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:22.289012', 'step': 13239, 'epoch': 2} {'type': 'loss', 'content': 0.10107789933681488, 'timestamp': '2025-09-30 22:28:22.313356', 'step': 13240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.346783', 'step': 13240, 'epoch': 2} {'type': 'loss', 'content': 0.11170750856399536, 'timestamp': '2025-09-30 22:28:22.353332', 'step': 13241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.384885', 'step': 13241, 'epoch': 2} {'type': 'loss', 'content': 0.05518865957856178, 'timestamp': '2025-09-30 22:28:22.390106', 'step': 13242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.422421', 'step': 13242, 'epoch': 2} {'type': 'loss', 'content': 0.1317799836397171, 'timestamp': '2025-09-30 22:28:22.426140', 'step': 13243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.464769', 'step': 13243, 'epoch': 2} {'type': 'loss', 'content': 0.05353028327226639, 'timestamp': '2025-09-30 22:28:22.491587', 'step': 13244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.526162', 'step': 13244, 'epoch': 2} {'type': 'loss', 'content': 0.14035896956920624, 'timestamp': '2025-09-30 22:28:22.532260', 'step': 13245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:22.564612', 'step': 13245, 'epoch': 2} {'type': 'loss', 'content': 0.16261842846870422, 'timestamp': '2025-09-30 22:28:22.570235', 'step': 13246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.616647', 'step': 13246, 'epoch': 2} {'type': 'loss', 'content': 0.13714699447155, 'timestamp': '2025-09-30 22:28:22.622100', 'step': 13247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.672615', 'step': 13247, 'epoch': 2} {'type': 'loss', 'content': 0.044837094843387604, 'timestamp': '2025-09-30 22:28:22.700158', 'step': 13248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.734796', 'step': 13248, 'epoch': 2} {'type': 'loss', 'content': 0.09409645199775696, 'timestamp': '2025-09-30 22:28:22.739631', 'step': 13249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.772784', 'step': 13249, 'epoch': 2} {'type': 'loss', 'content': 0.14070644974708557, 'timestamp': '2025-09-30 22:28:22.786343', 'step': 13250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.817374', 'step': 13250, 'epoch': 2} {'type': 'loss', 'content': 0.0861215591430664, 'timestamp': '2025-09-30 22:28:22.821063', 'step': 13251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.856777', 'step': 13251, 'epoch': 2} {'type': 'loss', 'content': 0.067962646484375, 'timestamp': '2025-09-30 22:28:22.890637', 'step': 13252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:22.923658', 'step': 13252, 'epoch': 2} {'type': 'loss', 'content': 0.06535228341817856, 'timestamp': '2025-09-30 22:28:22.928564', 'step': 13253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:22.972588', 'step': 13253, 'epoch': 2} {'type': 'loss', 'content': 0.080662801861763, 'timestamp': '2025-09-30 22:28:22.991701', 'step': 13254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.031321', 'step': 13254, 'epoch': 2} {'type': 'loss', 'content': 0.08405401557683945, 'timestamp': '2025-09-30 22:28:23.042504', 'step': 13255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:23.075124', 'step': 13255, 'epoch': 2} {'type': 'loss', 'content': 0.15032070875167847, 'timestamp': '2025-09-30 22:28:23.109392', 'step': 13256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.155891', 'step': 13256, 'epoch': 2} {'type': 'loss', 'content': 0.09748220443725586, 'timestamp': '2025-09-30 22:28:23.160081', 'step': 13257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:23.204312', 'step': 13257, 'epoch': 2} {'type': 'loss', 'content': 0.02733641117811203, 'timestamp': '2025-09-30 22:28:23.209362', 'step': 13258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.240987', 'step': 13258, 'epoch': 2} {'type': 'loss', 'content': 0.06449282169342041, 'timestamp': '2025-09-30 22:28:23.244246', 'step': 13259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:23.275791', 'step': 13259, 'epoch': 2} {'type': 'loss', 'content': 0.06045810505747795, 'timestamp': '2025-09-30 22:28:23.302850', 'step': 13260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.336335', 'step': 13260, 'epoch': 2} {'type': 'loss', 'content': 0.21653315424919128, 'timestamp': '2025-09-30 22:28:23.341030', 'step': 13261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:23.373391', 'step': 13261, 'epoch': 2} {'type': 'loss', 'content': 0.14926475286483765, 'timestamp': '2025-09-30 22:28:23.377563', 'step': 13262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:23.408695', 'step': 13262, 'epoch': 2} {'type': 'loss', 'content': 0.008722163736820221, 'timestamp': '2025-09-30 22:28:23.428544', 'step': 13263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.460310', 'step': 13263, 'epoch': 2} {'type': 'loss', 'content': 0.06414081901311874, 'timestamp': '2025-09-30 22:28:23.487186', 'step': 13264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.524307', 'step': 13264, 'epoch': 2} {'type': 'loss', 'content': 0.07182741910219193, 'timestamp': '2025-09-30 22:28:23.529634', 'step': 13265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:23.563707', 'step': 13265, 'epoch': 2} {'type': 'loss', 'content': 0.13368061184883118, 'timestamp': '2025-09-30 22:28:23.579439', 'step': 13266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:23.615535', 'step': 13266, 'epoch': 2} {'type': 'loss', 'content': 0.08771559596061707, 'timestamp': '2025-09-30 22:28:23.619690', 'step': 13267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:23.653187', 'step': 13267, 'epoch': 2} {'type': 'loss', 'content': 0.1237797737121582, 'timestamp': '2025-09-30 22:28:23.678621', 'step': 13268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:23.710094', 'step': 13268, 'epoch': 2} {'type': 'loss', 'content': 0.08338384330272675, 'timestamp': '2025-09-30 22:28:23.715218', 'step': 13269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.747643', 'step': 13269, 'epoch': 2} {'type': 'loss', 'content': 0.07062367349863052, 'timestamp': '2025-09-30 22:28:23.751351', 'step': 13270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.784175', 'step': 13270, 'epoch': 2} {'type': 'loss', 'content': 0.0397280789911747, 'timestamp': '2025-09-30 22:28:23.801115', 'step': 13271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:23.842999', 'step': 13271, 'epoch': 2} {'type': 'loss', 'content': 0.06652235239744186, 'timestamp': '2025-09-30 22:28:23.883277', 'step': 13272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:23.914643', 'step': 13272, 'epoch': 2} {'type': 'loss', 'content': 0.13483186066150665, 'timestamp': '2025-09-30 22:28:23.918597', 'step': 13273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:23.957938', 'step': 13273, 'epoch': 2} {'type': 'loss', 'content': 0.10003675520420074, 'timestamp': '2025-09-30 22:28:23.960742', 'step': 13274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:23.992952', 'step': 13274, 'epoch': 2} {'type': 'loss', 'content': 0.14995914697647095, 'timestamp': '2025-09-30 22:28:23.997185', 'step': 13275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.027987', 'step': 13275, 'epoch': 2} {'type': 'loss', 'content': 0.09676732122898102, 'timestamp': '2025-09-30 22:28:24.053236', 'step': 13276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.085267', 'step': 13276, 'epoch': 2} {'type': 'loss', 'content': 0.07354293018579483, 'timestamp': '2025-09-30 22:28:24.100346', 'step': 13277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.151771', 'step': 13277, 'epoch': 2} {'type': 'loss', 'content': 0.130684033036232, 'timestamp': '2025-09-30 22:28:24.157458', 'step': 13278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.190710', 'step': 13278, 'epoch': 2} {'type': 'loss', 'content': 0.02122807689011097, 'timestamp': '2025-09-30 22:28:24.195689', 'step': 13279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.243167', 'step': 13279, 'epoch': 2} {'type': 'loss', 'content': 0.1482061892747879, 'timestamp': '2025-09-30 22:28:24.269287', 'step': 13280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.309414', 'step': 13280, 'epoch': 2} {'type': 'loss', 'content': 0.08652376383543015, 'timestamp': '2025-09-30 22:28:24.323352', 'step': 13281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:24.358765', 'step': 13281, 'epoch': 2} {'type': 'loss', 'content': 0.15922178328037262, 'timestamp': '2025-09-30 22:28:24.375134', 'step': 13282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:24.412593', 'step': 13282, 'epoch': 2} {'type': 'loss', 'content': 0.08443784713745117, 'timestamp': '2025-09-30 22:28:24.416396', 'step': 13283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.447548', 'step': 13283, 'epoch': 2} {'type': 'loss', 'content': 0.09192875027656555, 'timestamp': '2025-09-30 22:28:24.473853', 'step': 13284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.515823', 'step': 13284, 'epoch': 2} {'type': 'loss', 'content': 0.1019989624619484, 'timestamp': '2025-09-30 22:28:24.520560', 'step': 13285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:24.553166', 'step': 13285, 'epoch': 2} {'type': 'loss', 'content': 0.15739242732524872, 'timestamp': '2025-09-30 22:28:24.570302', 'step': 13286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:24.603541', 'step': 13286, 'epoch': 2} {'type': 'loss', 'content': 0.10058999806642532, 'timestamp': '2025-09-30 22:28:24.618576', 'step': 13287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.660628', 'step': 13287, 'epoch': 2} {'type': 'loss', 'content': 0.03093639947474003, 'timestamp': '2025-09-30 22:28:24.695841', 'step': 13288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:24.726472', 'step': 13288, 'epoch': 2} {'type': 'loss', 'content': 0.10359872877597809, 'timestamp': '2025-09-30 22:28:24.730685', 'step': 13289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.776329', 'step': 13289, 'epoch': 2} {'type': 'loss', 'content': 0.11265730112791061, 'timestamp': '2025-09-30 22:28:24.780020', 'step': 13290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.821708', 'step': 13290, 'epoch': 2} {'type': 'loss', 'content': 0.1404838114976883, 'timestamp': '2025-09-30 22:28:24.825394', 'step': 13291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:24.857802', 'step': 13291, 'epoch': 2} {'type': 'loss', 'content': 0.07851620763540268, 'timestamp': '2025-09-30 22:28:24.885978', 'step': 13292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:28:24.919480', 'step': 13292, 'epoch': 2} {'type': 'loss', 'content': 0.14829500019550323, 'timestamp': '2025-09-30 22:28:24.924787', 'step': 13293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.957663', 'step': 13293, 'epoch': 2} {'type': 'loss', 'content': 0.07945762574672699, 'timestamp': '2025-09-30 22:28:24.962838', 'step': 13294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:24.995977', 'step': 13294, 'epoch': 2} {'type': 'loss', 'content': 0.05585019290447235, 'timestamp': '2025-09-30 22:28:25.000799', 'step': 13295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:25.031305', 'step': 13295, 'epoch': 2} {'type': 'loss', 'content': 0.1530149132013321, 'timestamp': '2025-09-30 22:28:25.057868', 'step': 13296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.089839', 'step': 13296, 'epoch': 2} {'type': 'loss', 'content': 0.111333929002285, 'timestamp': '2025-09-30 22:28:25.094750', 'step': 13297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.127297', 'step': 13297, 'epoch': 2} {'type': 'loss', 'content': 0.10224775969982147, 'timestamp': '2025-09-30 22:28:25.131875', 'step': 13298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.164780', 'step': 13298, 'epoch': 2} {'type': 'loss', 'content': 0.08581288903951645, 'timestamp': '2025-09-30 22:28:25.169504', 'step': 13299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:25.218414', 'step': 13299, 'epoch': 2} {'type': 'loss', 'content': 0.10996638238430023, 'timestamp': '2025-09-30 22:28:25.244513', 'step': 13300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:25.288439', 'step': 13300, 'epoch': 2} {'type': 'loss', 'content': 0.09132112562656403, 'timestamp': '2025-09-30 22:28:25.294348', 'step': 13301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.325934', 'step': 13301, 'epoch': 2} {'type': 'loss', 'content': 0.1407708078622818, 'timestamp': '2025-09-30 22:28:25.329246', 'step': 13302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:25.367402', 'step': 13302, 'epoch': 2} {'type': 'loss', 'content': 0.13018926978111267, 'timestamp': '2025-09-30 22:28:25.372007', 'step': 13303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.404734', 'step': 13303, 'epoch': 2} {'type': 'loss', 'content': 0.08719181269407272, 'timestamp': '2025-09-30 22:28:25.440676', 'step': 13304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:25.473900', 'step': 13304, 'epoch': 2} {'type': 'loss', 'content': 0.11824264377355576, 'timestamp': '2025-09-30 22:28:25.478557', 'step': 13305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:25.510041', 'step': 13305, 'epoch': 2} {'type': 'loss', 'content': 0.18680202960968018, 'timestamp': '2025-09-30 22:28:25.515893', 'step': 13306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:25.548472', 'step': 13306, 'epoch': 2} {'type': 'loss', 'content': 0.14963190257549286, 'timestamp': '2025-09-30 22:28:25.562554', 'step': 13307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:25.597192', 'step': 13307, 'epoch': 2} {'type': 'loss', 'content': 0.10737315565347672, 'timestamp': '2025-09-30 22:28:25.622820', 'step': 13308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:25.655501', 'step': 13308, 'epoch': 2} {'type': 'loss', 'content': 0.1301954984664917, 'timestamp': '2025-09-30 22:28:25.660249', 'step': 13309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.691811', 'step': 13309, 'epoch': 2} {'type': 'loss', 'content': 0.18792253732681274, 'timestamp': '2025-09-30 22:28:25.696451', 'step': 13310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:25.739779', 'step': 13310, 'epoch': 2} {'type': 'loss', 'content': 0.12079725414514542, 'timestamp': '2025-09-30 22:28:25.746044', 'step': 13311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:25.778760', 'step': 13311, 'epoch': 2} {'type': 'loss', 'content': 0.11125822365283966, 'timestamp': '2025-09-30 22:28:25.803949', 'step': 13312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:25.834503', 'step': 13312, 'epoch': 2} {'type': 'loss', 'content': 0.12936942279338837, 'timestamp': '2025-09-30 22:28:25.838452', 'step': 13313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:25.874467', 'step': 13313, 'epoch': 2} {'type': 'loss', 'content': 0.09169750660657883, 'timestamp': '2025-09-30 22:28:25.889336', 'step': 13314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:25.931876', 'step': 13314, 'epoch': 2} {'type': 'loss', 'content': 0.13967669010162354, 'timestamp': '2025-09-30 22:28:25.936352', 'step': 13315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:25.976265', 'step': 13315, 'epoch': 2} {'type': 'loss', 'content': 0.04167495295405388, 'timestamp': '2025-09-30 22:28:26.002562', 'step': 13316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:26.034943', 'step': 13316, 'epoch': 2} {'type': 'loss', 'content': 0.1220482736825943, 'timestamp': '2025-09-30 22:28:26.049940', 'step': 13317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.092528', 'step': 13317, 'epoch': 2} {'type': 'loss', 'content': 0.10433471202850342, 'timestamp': '2025-09-30 22:28:26.098064', 'step': 13318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.140950', 'step': 13318, 'epoch': 2} {'type': 'loss', 'content': 0.13820362091064453, 'timestamp': '2025-09-30 22:28:26.147049', 'step': 13319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:26.183678', 'step': 13319, 'epoch': 2} {'type': 'loss', 'content': 0.17660799622535706, 'timestamp': '2025-09-30 22:28:26.208960', 'step': 13320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.242695', 'step': 13320, 'epoch': 2} {'type': 'loss', 'content': 0.10454603284597397, 'timestamp': '2025-09-30 22:28:26.247461', 'step': 13321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:26.281155', 'step': 13321, 'epoch': 2} {'type': 'loss', 'content': 0.11018872261047363, 'timestamp': '2025-09-30 22:28:26.284978', 'step': 13322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:26.324641', 'step': 13322, 'epoch': 2} {'type': 'loss', 'content': 0.08315099030733109, 'timestamp': '2025-09-30 22:28:26.343892', 'step': 13323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:26.386650', 'step': 13323, 'epoch': 2} {'type': 'loss', 'content': 0.05524938926100731, 'timestamp': '2025-09-30 22:28:26.412852', 'step': 13324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.444693', 'step': 13324, 'epoch': 2} {'type': 'loss', 'content': 0.1256953477859497, 'timestamp': '2025-09-30 22:28:26.448568', 'step': 13325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:26.479897', 'step': 13325, 'epoch': 2} {'type': 'loss', 'content': 0.09711616486310959, 'timestamp': '2025-09-30 22:28:26.484232', 'step': 13326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:26.516819', 'step': 13326, 'epoch': 2} {'type': 'loss', 'content': 0.08578427881002426, 'timestamp': '2025-09-30 22:28:26.523043', 'step': 13327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:26.555779', 'step': 13327, 'epoch': 2} {'type': 'loss', 'content': 0.13932251930236816, 'timestamp': '2025-09-30 22:28:26.581453', 'step': 13328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:26.616752', 'step': 13328, 'epoch': 2} {'type': 'loss', 'content': 0.08422505110502243, 'timestamp': '2025-09-30 22:28:26.622002', 'step': 13329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:26.655211', 'step': 13329, 'epoch': 2} {'type': 'loss', 'content': 0.15916015207767487, 'timestamp': '2025-09-30 22:28:26.677355', 'step': 13330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.708751', 'step': 13330, 'epoch': 2} {'type': 'loss', 'content': 0.08474328368902206, 'timestamp': '2025-09-30 22:28:26.711996', 'step': 13331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:26.743494', 'step': 13331, 'epoch': 2} {'type': 'loss', 'content': 0.07516629993915558, 'timestamp': '2025-09-30 22:28:26.768505', 'step': 13332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.799113', 'step': 13332, 'epoch': 2} {'type': 'loss', 'content': 0.12244391441345215, 'timestamp': '2025-09-30 22:28:26.802894', 'step': 13333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:26.845301', 'step': 13333, 'epoch': 2} {'type': 'loss', 'content': 0.03589063882827759, 'timestamp': '2025-09-30 22:28:26.849931', 'step': 13334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:26.896118', 'step': 13334, 'epoch': 2} {'type': 'loss', 'content': 0.11193801462650299, 'timestamp': '2025-09-30 22:28:26.901372', 'step': 13335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.935550', 'step': 13335, 'epoch': 2} {'type': 'loss', 'content': 0.08960384875535965, 'timestamp': '2025-09-30 22:28:26.960467', 'step': 13336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:26.991054', 'step': 13336, 'epoch': 2} {'type': 'loss', 'content': 0.06067337468266487, 'timestamp': '2025-09-30 22:28:26.996720', 'step': 13337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.040776', 'step': 13337, 'epoch': 2} {'type': 'loss', 'content': 0.06562919169664383, 'timestamp': '2025-09-30 22:28:27.044916', 'step': 13338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.076593', 'step': 13338, 'epoch': 2} {'type': 'loss', 'content': 0.11598518490791321, 'timestamp': '2025-09-30 22:28:27.080302', 'step': 13339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:27.121483', 'step': 13339, 'epoch': 2} {'type': 'loss', 'content': 0.10313750803470612, 'timestamp': '2025-09-30 22:28:27.149560', 'step': 13340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:27.181287', 'step': 13340, 'epoch': 2} {'type': 'loss', 'content': 0.16408348083496094, 'timestamp': '2025-09-30 22:28:27.186543', 'step': 13341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.219358', 'step': 13341, 'epoch': 2} {'type': 'loss', 'content': 0.10533494502305984, 'timestamp': '2025-09-30 22:28:27.222593', 'step': 13342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:27.253165', 'step': 13342, 'epoch': 2} {'type': 'loss', 'content': 0.11897613853216171, 'timestamp': '2025-09-30 22:28:27.258738', 'step': 13343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:27.293562', 'step': 13343, 'epoch': 2} {'type': 'loss', 'content': 0.10994892567396164, 'timestamp': '2025-09-30 22:28:27.320381', 'step': 13344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:27.363706', 'step': 13344, 'epoch': 2} {'type': 'loss', 'content': 0.04967675358057022, 'timestamp': '2025-09-30 22:28:27.368299', 'step': 13345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:27.399791', 'step': 13345, 'epoch': 2} {'type': 'loss', 'content': 0.15714868903160095, 'timestamp': '2025-09-30 22:28:27.403048', 'step': 13346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:27.434624', 'step': 13346, 'epoch': 2} {'type': 'loss', 'content': 0.07604332268238068, 'timestamp': '2025-09-30 22:28:27.442926', 'step': 13347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:27.473894', 'step': 13347, 'epoch': 2} {'type': 'loss', 'content': 0.09501879662275314, 'timestamp': '2025-09-30 22:28:27.500594', 'step': 13348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:27.533267', 'step': 13348, 'epoch': 2} {'type': 'loss', 'content': 0.21787914633750916, 'timestamp': '2025-09-30 22:28:27.536581', 'step': 13349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.570975', 'step': 13349, 'epoch': 2} {'type': 'loss', 'content': 0.05484839156270027, 'timestamp': '2025-09-30 22:28:27.575610', 'step': 13350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:27.610164', 'step': 13350, 'epoch': 2} {'type': 'loss', 'content': 0.13486900925636292, 'timestamp': '2025-09-30 22:28:27.614106', 'step': 13351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.646515', 'step': 13351, 'epoch': 2} {'type': 'loss', 'content': 0.06116057187318802, 'timestamp': '2025-09-30 22:28:27.673128', 'step': 13352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.704527', 'step': 13352, 'epoch': 2} {'type': 'loss', 'content': 0.085897296667099, 'timestamp': '2025-09-30 22:28:27.707535', 'step': 13353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.738089', 'step': 13353, 'epoch': 2} {'type': 'loss', 'content': 0.05847272649407387, 'timestamp': '2025-09-30 22:28:27.741835', 'step': 13354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.774277', 'step': 13354, 'epoch': 2} {'type': 'loss', 'content': 0.16571514308452606, 'timestamp': '2025-09-30 22:28:27.779409', 'step': 13355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:27.813218', 'step': 13355, 'epoch': 2} {'type': 'loss', 'content': 0.11316508799791336, 'timestamp': '2025-09-30 22:28:27.840004', 'step': 13356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:27.873218', 'step': 13356, 'epoch': 2} {'type': 'loss', 'content': 0.14292168617248535, 'timestamp': '2025-09-30 22:28:27.877720', 'step': 13357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:27.912373', 'step': 13357, 'epoch': 2} {'type': 'loss', 'content': 0.09821267426013947, 'timestamp': '2025-09-30 22:28:27.917724', 'step': 13358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:27.963344', 'step': 13358, 'epoch': 2} {'type': 'loss', 'content': 0.028068456798791885, 'timestamp': '2025-09-30 22:28:27.966829', 'step': 13359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.019012', 'step': 13359, 'epoch': 2} {'type': 'loss', 'content': 0.15357862412929535, 'timestamp': '2025-09-30 22:28:28.045316', 'step': 13360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.077939', 'step': 13360, 'epoch': 2} {'type': 'loss', 'content': 0.11201990395784378, 'timestamp': '2025-09-30 22:28:28.081806', 'step': 13361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.117454', 'step': 13361, 'epoch': 2} {'type': 'loss', 'content': 0.0525752492249012, 'timestamp': '2025-09-30 22:28:28.121851', 'step': 13362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.154773', 'step': 13362, 'epoch': 2} {'type': 'loss', 'content': 0.06746365129947662, 'timestamp': '2025-09-30 22:28:28.158147', 'step': 13363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:28.191165', 'step': 13363, 'epoch': 2} {'type': 'loss', 'content': 0.15452681481838226, 'timestamp': '2025-09-30 22:28:28.216189', 'step': 13364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:28.267530', 'step': 13364, 'epoch': 2} {'type': 'loss', 'content': 0.13012568652629852, 'timestamp': '2025-09-30 22:28:28.271494', 'step': 13365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.303836', 'step': 13365, 'epoch': 2} {'type': 'loss', 'content': 0.11785159260034561, 'timestamp': '2025-09-30 22:28:28.307630', 'step': 13366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.340046', 'step': 13366, 'epoch': 2} {'type': 'loss', 'content': 0.1121741309762001, 'timestamp': '2025-09-30 22:28:28.358612', 'step': 13367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.402174', 'step': 13367, 'epoch': 2} {'type': 'loss', 'content': 0.1463315784931183, 'timestamp': '2025-09-30 22:28:28.428031', 'step': 13368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.462482', 'step': 13368, 'epoch': 2} {'type': 'loss', 'content': 0.09672988951206207, 'timestamp': '2025-09-30 22:28:28.469643', 'step': 13369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:28.502315', 'step': 13369, 'epoch': 2} {'type': 'loss', 'content': 0.0963425561785698, 'timestamp': '2025-09-30 22:28:28.507392', 'step': 13370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.552586', 'step': 13370, 'epoch': 2} {'type': 'loss', 'content': 0.11296895146369934, 'timestamp': '2025-09-30 22:28:28.556655', 'step': 13371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.588966', 'step': 13371, 'epoch': 2} {'type': 'loss', 'content': 0.10759782791137695, 'timestamp': '2025-09-30 22:28:28.614813', 'step': 13372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:28.647481', 'step': 13372, 'epoch': 2} {'type': 'loss', 'content': 0.0911082774400711, 'timestamp': '2025-09-30 22:28:28.664484', 'step': 13373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.696632', 'step': 13373, 'epoch': 2} {'type': 'loss', 'content': 0.243540421128273, 'timestamp': '2025-09-30 22:28:28.708688', 'step': 13374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:28.749983', 'step': 13374, 'epoch': 2} {'type': 'loss', 'content': 0.11639340966939926, 'timestamp': '2025-09-30 22:28:28.760671', 'step': 13375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.793626', 'step': 13375, 'epoch': 2} {'type': 'loss', 'content': 0.08671486377716064, 'timestamp': '2025-09-30 22:28:28.818748', 'step': 13376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:28.851662', 'step': 13376, 'epoch': 2} {'type': 'loss', 'content': 0.09647610783576965, 'timestamp': '2025-09-30 22:28:28.862580', 'step': 13377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:28.907474', 'step': 13377, 'epoch': 2} {'type': 'loss', 'content': 0.07518228143453598, 'timestamp': '2025-09-30 22:28:28.911322', 'step': 13378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.944807', 'step': 13378, 'epoch': 2} {'type': 'loss', 'content': 0.15317219495773315, 'timestamp': '2025-09-30 22:28:28.948906', 'step': 13379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:28.990863', 'step': 13379, 'epoch': 2} {'type': 'loss', 'content': 0.09382189065217972, 'timestamp': '2025-09-30 22:28:29.016944', 'step': 13380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.047914', 'step': 13380, 'epoch': 2} {'type': 'loss', 'content': 0.10283936560153961, 'timestamp': '2025-09-30 22:28:29.051915', 'step': 13381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:29.084232', 'step': 13381, 'epoch': 2} {'type': 'loss', 'content': 0.13355836272239685, 'timestamp': '2025-09-30 22:28:29.087339', 'step': 13382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.120673', 'step': 13382, 'epoch': 2} {'type': 'loss', 'content': 0.1698990762233734, 'timestamp': '2025-09-30 22:28:29.125394', 'step': 13383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.156144', 'step': 13383, 'epoch': 2} {'type': 'loss', 'content': 0.10975590348243713, 'timestamp': '2025-09-30 22:28:29.181331', 'step': 13384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.213265', 'step': 13384, 'epoch': 2} {'type': 'loss', 'content': 0.052534330636262894, 'timestamp': '2025-09-30 22:28:29.218288', 'step': 13385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.264090', 'step': 13385, 'epoch': 2} {'type': 'loss', 'content': 0.11049214005470276, 'timestamp': '2025-09-30 22:28:29.268603', 'step': 13386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.302542', 'step': 13386, 'epoch': 2} {'type': 'loss', 'content': 0.04712280258536339, 'timestamp': '2025-09-30 22:28:29.316402', 'step': 13387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.357022', 'step': 13387, 'epoch': 2} {'type': 'loss', 'content': 0.0474458672106266, 'timestamp': '2025-09-30 22:28:29.383983', 'step': 13388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.427326', 'step': 13388, 'epoch': 2} {'type': 'loss', 'content': 0.07560374587774277, 'timestamp': '2025-09-30 22:28:29.431980', 'step': 13389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.479169', 'step': 13389, 'epoch': 2} {'type': 'loss', 'content': 0.03695974126458168, 'timestamp': '2025-09-30 22:28:29.484642', 'step': 13390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.531096', 'step': 13390, 'epoch': 2} {'type': 'loss', 'content': 0.10256887227296829, 'timestamp': '2025-09-30 22:28:29.535576', 'step': 13391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.568208', 'step': 13391, 'epoch': 2} {'type': 'loss', 'content': 0.11767364293336868, 'timestamp': '2025-09-30 22:28:29.600800', 'step': 13392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:29.631770', 'step': 13392, 'epoch': 2} {'type': 'loss', 'content': 0.1956159919500351, 'timestamp': '2025-09-30 22:28:29.636547', 'step': 13393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.668182', 'step': 13393, 'epoch': 2} {'type': 'loss', 'content': 0.10384803265333176, 'timestamp': '2025-09-30 22:28:29.671339', 'step': 13394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:29.703024', 'step': 13394, 'epoch': 2} {'type': 'loss', 'content': 0.10901939868927002, 'timestamp': '2025-09-30 22:28:29.708070', 'step': 13395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.749044', 'step': 13395, 'epoch': 2} {'type': 'loss', 'content': 0.08732210099697113, 'timestamp': '2025-09-30 22:28:29.775273', 'step': 13396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:29.806747', 'step': 13396, 'epoch': 2} {'type': 'loss', 'content': 0.041463010013103485, 'timestamp': '2025-09-30 22:28:29.824865', 'step': 13397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.871338', 'step': 13397, 'epoch': 2} {'type': 'loss', 'content': 0.0861000120639801, 'timestamp': '2025-09-30 22:28:29.877056', 'step': 13398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:29.925275', 'step': 13398, 'epoch': 2} {'type': 'loss', 'content': 0.07891891896724701, 'timestamp': '2025-09-30 22:28:29.931274', 'step': 13399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:29.964852', 'step': 13399, 'epoch': 2} {'type': 'loss', 'content': 0.19201505184173584, 'timestamp': '2025-09-30 22:28:29.990168', 'step': 13400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.020676', 'step': 13400, 'epoch': 2} {'type': 'loss', 'content': 0.13184241950511932, 'timestamp': '2025-09-30 22:28:30.025277', 'step': 13401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.057537', 'step': 13401, 'epoch': 2} {'type': 'loss', 'content': 0.12298870831727982, 'timestamp': '2025-09-30 22:28:30.074447', 'step': 13402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:30.106960', 'step': 13402, 'epoch': 2} {'type': 'loss', 'content': 0.03406212478876114, 'timestamp': '2025-09-30 22:28:30.110346', 'step': 13403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.141856', 'step': 13403, 'epoch': 2} {'type': 'loss', 'content': 0.06661374866962433, 'timestamp': '2025-09-30 22:28:30.166314', 'step': 13404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:30.206032', 'step': 13404, 'epoch': 2} {'type': 'loss', 'content': 0.19054141640663147, 'timestamp': '2025-09-30 22:28:30.210165', 'step': 13405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:30.243901', 'step': 13405, 'epoch': 2} {'type': 'loss', 'content': 0.0780058354139328, 'timestamp': '2025-09-30 22:28:30.247668', 'step': 13406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:30.280078', 'step': 13406, 'epoch': 2} {'type': 'loss', 'content': 0.0930933803319931, 'timestamp': '2025-09-30 22:28:30.283855', 'step': 13407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.315481', 'step': 13407, 'epoch': 2} {'type': 'loss', 'content': 0.15402166545391083, 'timestamp': '2025-09-30 22:28:30.342502', 'step': 13408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.375350', 'step': 13408, 'epoch': 2} {'type': 'loss', 'content': 0.1001482829451561, 'timestamp': '2025-09-30 22:28:30.381653', 'step': 13409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:30.414021', 'step': 13409, 'epoch': 2} {'type': 'loss', 'content': 0.17045415937900543, 'timestamp': '2025-09-30 22:28:30.418275', 'step': 13410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:30.449677', 'step': 13410, 'epoch': 2} {'type': 'loss', 'content': 0.09105608612298965, 'timestamp': '2025-09-30 22:28:30.457551', 'step': 13411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.494299', 'step': 13411, 'epoch': 2} {'type': 'loss', 'content': 0.12211181223392487, 'timestamp': '2025-09-30 22:28:30.518988', 'step': 13412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.550919', 'step': 13412, 'epoch': 2} {'type': 'loss', 'content': 0.11382383853197098, 'timestamp': '2025-09-30 22:28:30.554392', 'step': 13413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:30.587792', 'step': 13413, 'epoch': 2} {'type': 'loss', 'content': 0.0794338807463646, 'timestamp': '2025-09-30 22:28:30.590568', 'step': 13414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.622019', 'step': 13414, 'epoch': 2} {'type': 'loss', 'content': 0.08592047542333603, 'timestamp': '2025-09-30 22:28:30.626907', 'step': 13415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.669490', 'step': 13415, 'epoch': 2} {'type': 'loss', 'content': 0.061800163239240646, 'timestamp': '2025-09-30 22:28:30.694990', 'step': 13416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.728425', 'step': 13416, 'epoch': 2} {'type': 'loss', 'content': 0.08772509545087814, 'timestamp': '2025-09-30 22:28:30.733825', 'step': 13417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.765553', 'step': 13417, 'epoch': 2} {'type': 'loss', 'content': 0.0959000438451767, 'timestamp': '2025-09-30 22:28:30.769294', 'step': 13418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:30.802791', 'step': 13418, 'epoch': 2} {'type': 'loss', 'content': 0.16378751397132874, 'timestamp': '2025-09-30 22:28:30.806952', 'step': 13419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:30.848860', 'step': 13419, 'epoch': 2} {'type': 'loss', 'content': 0.045767467468976974, 'timestamp': '2025-09-30 22:28:30.877752', 'step': 13420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:30.917818', 'step': 13420, 'epoch': 2} {'type': 'loss', 'content': 0.11276321858167648, 'timestamp': '2025-09-30 22:28:30.929642', 'step': 13421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:30.969825', 'step': 13421, 'epoch': 2} {'type': 'loss', 'content': 0.11109385639429092, 'timestamp': '2025-09-30 22:28:30.974877', 'step': 13422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:31.007038', 'step': 13422, 'epoch': 2} {'type': 'loss', 'content': 0.036698319017887115, 'timestamp': '2025-09-30 22:28:31.010066', 'step': 13423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.041498', 'step': 13423, 'epoch': 2} {'type': 'loss', 'content': 0.05618469417095184, 'timestamp': '2025-09-30 22:28:31.073134', 'step': 13424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.104610', 'step': 13424, 'epoch': 2} {'type': 'loss', 'content': 0.0788404643535614, 'timestamp': '2025-09-30 22:28:31.109151', 'step': 13425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.139323', 'step': 13425, 'epoch': 2} {'type': 'loss', 'content': 0.08124435693025589, 'timestamp': '2025-09-30 22:28:31.148358', 'step': 13426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:31.182336', 'step': 13426, 'epoch': 2} {'type': 'loss', 'content': 0.07748955488204956, 'timestamp': '2025-09-30 22:28:31.185251', 'step': 13427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:31.223805', 'step': 13427, 'epoch': 2} {'type': 'loss', 'content': 0.10255720466375351, 'timestamp': '2025-09-30 22:28:31.248212', 'step': 13428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:31.282702', 'step': 13428, 'epoch': 2} {'type': 'loss', 'content': 0.0697854533791542, 'timestamp': '2025-09-30 22:28:31.286576', 'step': 13429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.318765', 'step': 13429, 'epoch': 2} {'type': 'loss', 'content': 0.09545101970434189, 'timestamp': '2025-09-30 22:28:31.322643', 'step': 13430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:31.355205', 'step': 13430, 'epoch': 2} {'type': 'loss', 'content': 0.043674908578395844, 'timestamp': '2025-09-30 22:28:31.359836', 'step': 13431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:28:31.391902', 'step': 13431, 'epoch': 2} {'type': 'loss', 'content': 0.06291061639785767, 'timestamp': '2025-09-30 22:28:31.419859', 'step': 13432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.450694', 'step': 13432, 'epoch': 2} {'type': 'loss', 'content': 0.135258287191391, 'timestamp': '2025-09-30 22:28:31.462619', 'step': 13433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.500334', 'step': 13433, 'epoch': 2} {'type': 'loss', 'content': 0.06426259875297546, 'timestamp': '2025-09-30 22:28:31.504495', 'step': 13434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.535955', 'step': 13434, 'epoch': 2} {'type': 'loss', 'content': 0.1685476154088974, 'timestamp': '2025-09-30 22:28:31.539297', 'step': 13435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.572844', 'step': 13435, 'epoch': 2} {'type': 'loss', 'content': 0.10380134731531143, 'timestamp': '2025-09-30 22:28:31.598381', 'step': 13436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.629798', 'step': 13436, 'epoch': 2} {'type': 'loss', 'content': 0.0901436060667038, 'timestamp': '2025-09-30 22:28:31.633509', 'step': 13437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.673477', 'step': 13437, 'epoch': 2} {'type': 'loss', 'content': 0.10927118360996246, 'timestamp': '2025-09-30 22:28:31.683021', 'step': 13438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:31.714864', 'step': 13438, 'epoch': 2} {'type': 'loss', 'content': 0.1515931338071823, 'timestamp': '2025-09-30 22:28:31.719806', 'step': 13439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:28:31.752363', 'step': 13439, 'epoch': 2} {'type': 'loss', 'content': 0.11868410557508469, 'timestamp': '2025-09-30 22:28:31.778047', 'step': 13440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:31.809373', 'step': 13440, 'epoch': 2} {'type': 'loss', 'content': 0.10535972565412521, 'timestamp': '2025-09-30 22:28:31.812865', 'step': 13441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.845590', 'step': 13441, 'epoch': 2} {'type': 'loss', 'content': 0.1298806369304657, 'timestamp': '2025-09-30 22:28:31.855608', 'step': 13442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:31.887517', 'step': 13442, 'epoch': 2} {'type': 'loss', 'content': 0.11900929361581802, 'timestamp': '2025-09-30 22:28:31.895344', 'step': 13443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:31.937070', 'step': 13443, 'epoch': 2} {'type': 'loss', 'content': 0.15252277255058289, 'timestamp': '2025-09-30 22:28:31.962753', 'step': 13444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:31.997645', 'step': 13444, 'epoch': 2} {'type': 'loss', 'content': 0.11209558695554733, 'timestamp': '2025-09-30 22:28:32.003053', 'step': 13445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:32.036903', 'step': 13445, 'epoch': 2} {'type': 'loss', 'content': 0.06087597832083702, 'timestamp': '2025-09-30 22:28:32.046381', 'step': 13446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.079002', 'step': 13446, 'epoch': 2} {'type': 'loss', 'content': 0.06517395377159119, 'timestamp': '2025-09-30 22:28:32.084028', 'step': 13447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:32.116165', 'step': 13447, 'epoch': 2} {'type': 'loss', 'content': 0.11542883515357971, 'timestamp': '2025-09-30 22:28:32.143541', 'step': 13448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:32.176574', 'step': 13448, 'epoch': 2} {'type': 'loss', 'content': 0.08147840201854706, 'timestamp': '2025-09-30 22:28:32.180058', 'step': 13449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.213592', 'step': 13449, 'epoch': 2} {'type': 'loss', 'content': 0.13424041867256165, 'timestamp': '2025-09-30 22:28:32.227193', 'step': 13450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.269854', 'step': 13450, 'epoch': 2} {'type': 'loss', 'content': 0.08994738012552261, 'timestamp': '2025-09-30 22:28:32.279899', 'step': 13451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.320671', 'step': 13451, 'epoch': 2} {'type': 'loss', 'content': 0.04879380017518997, 'timestamp': '2025-09-30 22:28:32.345400', 'step': 13452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:32.379386', 'step': 13452, 'epoch': 2} {'type': 'loss', 'content': 0.06830032169818878, 'timestamp': '2025-09-30 22:28:32.384437', 'step': 13453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.417279', 'step': 13453, 'epoch': 2} {'type': 'loss', 'content': 0.13851706683635712, 'timestamp': '2025-09-30 22:28:32.424532', 'step': 13454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:32.456965', 'step': 13454, 'epoch': 2} {'type': 'loss', 'content': 0.16265471279621124, 'timestamp': '2025-09-30 22:28:32.464874', 'step': 13455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:32.499387', 'step': 13455, 'epoch': 2} {'type': 'loss', 'content': 0.08274196088314056, 'timestamp': '2025-09-30 22:28:32.530521', 'step': 13456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.562889', 'step': 13456, 'epoch': 2} {'type': 'loss', 'content': 0.1178392767906189, 'timestamp': '2025-09-30 22:28:32.566836', 'step': 13457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:32.598583', 'step': 13457, 'epoch': 2} {'type': 'loss', 'content': 0.09669926017522812, 'timestamp': '2025-09-30 22:28:32.602549', 'step': 13458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:32.639178', 'step': 13458, 'epoch': 2} {'type': 'loss', 'content': 0.14747029542922974, 'timestamp': '2025-09-30 22:28:32.642872', 'step': 13459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:32.674380', 'step': 13459, 'epoch': 2} {'type': 'loss', 'content': 0.1749420017004013, 'timestamp': '2025-09-30 22:28:32.700871', 'step': 13460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.731916', 'step': 13460, 'epoch': 2} {'type': 'loss', 'content': 0.11068972945213318, 'timestamp': '2025-09-30 22:28:32.738547', 'step': 13461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:32.779693', 'step': 13461, 'epoch': 2} {'type': 'loss', 'content': 0.09364631026983261, 'timestamp': '2025-09-30 22:28:32.783311', 'step': 13462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.819162', 'step': 13462, 'epoch': 2} {'type': 'loss', 'content': 0.13475365936756134, 'timestamp': '2025-09-30 22:28:32.827579', 'step': 13463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.858133', 'step': 13463, 'epoch': 2} {'type': 'loss', 'content': 0.09480344504117966, 'timestamp': '2025-09-30 22:28:32.886948', 'step': 13464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.917522', 'step': 13464, 'epoch': 2} {'type': 'loss', 'content': 0.11645964533090591, 'timestamp': '2025-09-30 22:28:32.928111', 'step': 13465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:32.967814', 'step': 13465, 'epoch': 2} {'type': 'loss', 'content': 0.07577648013830185, 'timestamp': '2025-09-30 22:28:32.975705', 'step': 13466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.014474', 'step': 13466, 'epoch': 2} {'type': 'loss', 'content': 0.2076030820608139, 'timestamp': '2025-09-30 22:28:33.018914', 'step': 13467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:33.054652', 'step': 13467, 'epoch': 2} {'type': 'loss', 'content': 0.05705533176660538, 'timestamp': '2025-09-30 22:28:33.080512', 'step': 13468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.111633', 'step': 13468, 'epoch': 2} {'type': 'loss', 'content': 0.0893903523683548, 'timestamp': '2025-09-30 22:28:33.116246', 'step': 13469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.148842', 'step': 13469, 'epoch': 2} {'type': 'loss', 'content': 0.1481492668390274, 'timestamp': '2025-09-30 22:28:33.161249', 'step': 13470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.198266', 'step': 13470, 'epoch': 2} {'type': 'loss', 'content': 0.1536361128091812, 'timestamp': '2025-09-30 22:28:33.201497', 'step': 13471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:33.240402', 'step': 13471, 'epoch': 2} {'type': 'loss', 'content': 0.03779253736138344, 'timestamp': '2025-09-30 22:28:33.265682', 'step': 13472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:33.297557', 'step': 13472, 'epoch': 2} {'type': 'loss', 'content': 0.15251952409744263, 'timestamp': '2025-09-30 22:28:33.303448', 'step': 13473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.337999', 'step': 13473, 'epoch': 2} {'type': 'loss', 'content': 0.139394149184227, 'timestamp': '2025-09-30 22:28:33.342511', 'step': 13474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:33.376399', 'step': 13474, 'epoch': 2} {'type': 'loss', 'content': 0.06513053178787231, 'timestamp': '2025-09-30 22:28:33.384003', 'step': 13475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.430174', 'step': 13475, 'epoch': 2} {'type': 'loss', 'content': 0.11014606058597565, 'timestamp': '2025-09-30 22:28:33.454582', 'step': 13476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.497875', 'step': 13476, 'epoch': 2} {'type': 'loss', 'content': 0.093748539686203, 'timestamp': '2025-09-30 22:28:33.505714', 'step': 13477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.556668', 'step': 13477, 'epoch': 2} {'type': 'loss', 'content': 0.04158971086144447, 'timestamp': '2025-09-30 22:28:33.559747', 'step': 13478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.615088', 'step': 13478, 'epoch': 2} {'type': 'loss', 'content': 0.057816363871097565, 'timestamp': '2025-09-30 22:28:33.618120', 'step': 13479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:33.654080', 'step': 13479, 'epoch': 2} {'type': 'loss', 'content': 0.15937386453151703, 'timestamp': '2025-09-30 22:28:33.683654', 'step': 13480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.721104', 'step': 13480, 'epoch': 2} {'type': 'loss', 'content': 0.0956539660692215, 'timestamp': '2025-09-30 22:28:33.725630', 'step': 13481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.761050', 'step': 13481, 'epoch': 2} {'type': 'loss', 'content': 0.07530041038990021, 'timestamp': '2025-09-30 22:28:33.763832', 'step': 13482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.807983', 'step': 13482, 'epoch': 2} {'type': 'loss', 'content': 0.06861570477485657, 'timestamp': '2025-09-30 22:28:33.814684', 'step': 13483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.849076', 'step': 13483, 'epoch': 2} {'type': 'loss', 'content': 0.13046693801879883, 'timestamp': '2025-09-30 22:28:33.875029', 'step': 13484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.922439', 'step': 13484, 'epoch': 2} {'type': 'loss', 'content': 0.064855195581913, 'timestamp': '2025-09-30 22:28:33.926749', 'step': 13485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:33.961474', 'step': 13485, 'epoch': 2} {'type': 'loss', 'content': 0.09484225511550903, 'timestamp': '2025-09-30 22:28:33.964896', 'step': 13486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.002624', 'step': 13486, 'epoch': 2} {'type': 'loss', 'content': 0.06358156353235245, 'timestamp': '2025-09-30 22:28:34.006682', 'step': 13487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.058921', 'step': 13487, 'epoch': 2} {'type': 'loss', 'content': 0.09727702289819717, 'timestamp': '2025-09-30 22:28:34.083712', 'step': 13488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.116668', 'step': 13488, 'epoch': 2} {'type': 'loss', 'content': 0.1271270364522934, 'timestamp': '2025-09-30 22:28:34.121638', 'step': 13489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:34.160595', 'step': 13489, 'epoch': 2} {'type': 'loss', 'content': 0.09727921336889267, 'timestamp': '2025-09-30 22:28:34.168657', 'step': 13490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.207908', 'step': 13490, 'epoch': 2} {'type': 'loss', 'content': 0.0964282974600792, 'timestamp': '2025-09-30 22:28:34.221458', 'step': 13491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:34.262688', 'step': 13491, 'epoch': 2} {'type': 'loss', 'content': 0.08845829963684082, 'timestamp': '2025-09-30 22:28:34.287978', 'step': 13492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.324707', 'step': 13492, 'epoch': 2} {'type': 'loss', 'content': 0.08702477067708969, 'timestamp': '2025-09-30 22:28:34.327322', 'step': 13493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.371110', 'step': 13493, 'epoch': 2} {'type': 'loss', 'content': 0.12105531990528107, 'timestamp': '2025-09-30 22:28:34.379339', 'step': 13494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:34.414862', 'step': 13494, 'epoch': 2} {'type': 'loss', 'content': 0.17879684269428253, 'timestamp': '2025-09-30 22:28:34.420293', 'step': 13495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.467501', 'step': 13495, 'epoch': 2} {'type': 'loss', 'content': 0.1921500265598297, 'timestamp': '2025-09-30 22:28:34.492399', 'step': 13496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:34.532611', 'step': 13496, 'epoch': 2} {'type': 'loss', 'content': 0.08976710587739944, 'timestamp': '2025-09-30 22:28:34.537223', 'step': 13497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:34.581638', 'step': 13497, 'epoch': 2} {'type': 'loss', 'content': 0.09955551475286484, 'timestamp': '2025-09-30 22:28:34.589652', 'step': 13498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.624889', 'step': 13498, 'epoch': 2} {'type': 'loss', 'content': 0.10628281533718109, 'timestamp': '2025-09-30 22:28:34.629149', 'step': 13499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:34.665587', 'step': 13499, 'epoch': 2} {'type': 'loss', 'content': 0.07552649825811386, 'timestamp': '2025-09-30 22:28:34.692197', 'step': 13500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 13500', 'timestamp': '2025-09-30 22:28:39.791255', 'step': 13500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:39.832638', 'step': 13500, 'epoch': 2} {'type': 'loss', 'content': 0.14936839044094086, 'timestamp': '2025-09-30 22:28:39.835866', 'step': 13501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:39.872325', 'step': 13501, 'epoch': 2} {'type': 'loss', 'content': 0.06312372535467148, 'timestamp': '2025-09-30 22:28:39.875901', 'step': 13502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:39.908613', 'step': 13502, 'epoch': 2} {'type': 'loss', 'content': 0.11981021612882614, 'timestamp': '2025-09-30 22:28:39.911972', 'step': 13503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:39.947922', 'step': 13503, 'epoch': 2} {'type': 'loss', 'content': 0.0782884955406189, 'timestamp': '2025-09-30 22:28:39.973084', 'step': 13504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:40.006929', 'step': 13504, 'epoch': 2} {'type': 'loss', 'content': 0.11634659022092819, 'timestamp': '2025-09-30 22:28:40.021898', 'step': 13505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:40.064671', 'step': 13505, 'epoch': 2} {'type': 'loss', 'content': 0.09605270624160767, 'timestamp': '2025-09-30 22:28:40.069488', 'step': 13506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:40.101323', 'step': 13506, 'epoch': 2} {'type': 'loss', 'content': 0.05307634919881821, 'timestamp': '2025-09-30 22:28:40.116627', 'step': 13507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:40.149941', 'step': 13507, 'epoch': 2} {'type': 'loss', 'content': 0.046622611582279205, 'timestamp': '2025-09-30 22:28:40.176495', 'step': 13508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:40.208498', 'step': 13508, 'epoch': 2} {'type': 'loss', 'content': 0.07613330334424973, 'timestamp': '2025-09-30 22:28:40.213565', 'step': 13509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:40.245358', 'step': 13509, 'epoch': 2} {'type': 'loss', 'content': 0.0774378851056099, 'timestamp': '2025-09-30 22:28:40.255599', 'step': 13510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:40.304113', 'step': 13510, 'epoch': 2} {'type': 'loss', 'content': 0.1563260406255722, 'timestamp': '2025-09-30 22:28:40.307988', 'step': 13511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:40.340511', 'step': 13511, 'epoch': 2} {'type': 'loss', 'content': 0.08309914916753769, 'timestamp': '2025-09-30 22:28:40.366035', 'step': 13512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:40.397638', 'step': 13512, 'epoch': 2} {'type': 'loss', 'content': 0.05847864970564842, 'timestamp': '2025-09-30 22:28:40.403856', 'step': 13513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:40.436189', 'step': 13513, 'epoch': 2} {'type': 'loss', 'content': 0.04767764359712601, 'timestamp': '2025-09-30 22:28:40.440443', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:28:48.193859', 'step': 13514, 'epoch': 2} {'type': 'pplx', 'content': 9384.095490400035, 'timestamp': '2025-09-30 22:28:48.197682', 'step': 13514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.228747', 'step': 13514, 'epoch': 2} {'type': 'loss', 'content': 0.1545381098985672, 'timestamp': '2025-09-30 22:28:48.231368', 'step': 13515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.262563', 'step': 13515, 'epoch': 2} {'type': 'loss', 'content': 0.10668928921222687, 'timestamp': '2025-09-30 22:28:48.288263', 'step': 13516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.319329', 'step': 13516, 'epoch': 2} {'type': 'loss', 'content': 0.053978241980075836, 'timestamp': '2025-09-30 22:28:48.324024', 'step': 13517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:48.356493', 'step': 13517, 'epoch': 2} {'type': 'loss', 'content': 0.11240998655557632, 'timestamp': '2025-09-30 22:28:48.361838', 'step': 13518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.396870', 'step': 13518, 'epoch': 2} {'type': 'loss', 'content': 0.23300111293792725, 'timestamp': '2025-09-30 22:28:48.400524', 'step': 13519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.434122', 'step': 13519, 'epoch': 2} {'type': 'loss', 'content': 0.11279239505529404, 'timestamp': '2025-09-30 22:28:48.466540', 'step': 13520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.498852', 'step': 13520, 'epoch': 2} {'type': 'loss', 'content': 0.12911155819892883, 'timestamp': '2025-09-30 22:28:48.503119', 'step': 13521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:48.534291', 'step': 13521, 'epoch': 2} {'type': 'loss', 'content': 0.14887188374996185, 'timestamp': '2025-09-30 22:28:48.545536', 'step': 13522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.583489', 'step': 13522, 'epoch': 2} {'type': 'loss', 'content': 0.02965041622519493, 'timestamp': '2025-09-30 22:28:48.586450', 'step': 13523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.618152', 'step': 13523, 'epoch': 2} {'type': 'loss', 'content': 0.05307437479496002, 'timestamp': '2025-09-30 22:28:48.642460', 'step': 13524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.678437', 'step': 13524, 'epoch': 2} {'type': 'loss', 'content': 0.14798420667648315, 'timestamp': '2025-09-30 22:28:48.681561', 'step': 13525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.713179', 'step': 13525, 'epoch': 2} {'type': 'loss', 'content': 0.04634569212794304, 'timestamp': '2025-09-30 22:28:48.719751', 'step': 13526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:48.751278', 'step': 13526, 'epoch': 2} {'type': 'loss', 'content': 0.14820489287376404, 'timestamp': '2025-09-30 22:28:48.762963', 'step': 13527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.799397', 'step': 13527, 'epoch': 2} {'type': 'loss', 'content': 0.10372098535299301, 'timestamp': '2025-09-30 22:28:48.824564', 'step': 13528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:48.857160', 'step': 13528, 'epoch': 2} {'type': 'loss', 'content': 0.10936239361763, 'timestamp': '2025-09-30 22:28:48.868520', 'step': 13529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.900626', 'step': 13529, 'epoch': 2} {'type': 'loss', 'content': 0.07726734131574631, 'timestamp': '2025-09-30 22:28:48.905131', 'step': 13530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:48.937496', 'step': 13530, 'epoch': 2} {'type': 'loss', 'content': 0.15029892325401306, 'timestamp': '2025-09-30 22:28:48.940380', 'step': 13531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:48.970766', 'step': 13531, 'epoch': 2} {'type': 'loss', 'content': 0.06635382026433945, 'timestamp': '2025-09-30 22:28:48.996083', 'step': 13532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:49.028164', 'step': 13532, 'epoch': 2} {'type': 'loss', 'content': 0.10993750393390656, 'timestamp': '2025-09-30 22:28:49.031125', 'step': 13533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.069162', 'step': 13533, 'epoch': 2} {'type': 'loss', 'content': 0.09301263839006424, 'timestamp': '2025-09-30 22:28:49.073087', 'step': 13534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.117500', 'step': 13534, 'epoch': 2} {'type': 'loss', 'content': 0.08692371845245361, 'timestamp': '2025-09-30 22:28:49.124004', 'step': 13535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:49.159604', 'step': 13535, 'epoch': 2} {'type': 'loss', 'content': 0.14124375581741333, 'timestamp': '2025-09-30 22:28:49.186389', 'step': 13536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:49.219842', 'step': 13536, 'epoch': 2} {'type': 'loss', 'content': 0.04012981057167053, 'timestamp': '2025-09-30 22:28:49.224637', 'step': 13537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.257624', 'step': 13537, 'epoch': 2} {'type': 'loss', 'content': 0.07169909030199051, 'timestamp': '2025-09-30 22:28:49.261375', 'step': 13538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.300566', 'step': 13538, 'epoch': 2} {'type': 'loss', 'content': 0.12819117307662964, 'timestamp': '2025-09-30 22:28:49.305563', 'step': 13539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.337582', 'step': 13539, 'epoch': 2} {'type': 'loss', 'content': 0.11585035175085068, 'timestamp': '2025-09-30 22:28:49.362509', 'step': 13540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:49.396396', 'step': 13540, 'epoch': 2} {'type': 'loss', 'content': 0.18990465998649597, 'timestamp': '2025-09-30 22:28:49.400882', 'step': 13541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:49.436164', 'step': 13541, 'epoch': 2} {'type': 'loss', 'content': 0.07010678201913834, 'timestamp': '2025-09-30 22:28:49.439401', 'step': 13542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:49.473383', 'step': 13542, 'epoch': 2} {'type': 'loss', 'content': 0.13487443327903748, 'timestamp': '2025-09-30 22:28:49.477360', 'step': 13543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.511155', 'step': 13543, 'epoch': 2} {'type': 'loss', 'content': 0.12390011548995972, 'timestamp': '2025-09-30 22:28:49.536463', 'step': 13544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:49.568249', 'step': 13544, 'epoch': 2} {'type': 'loss', 'content': 0.12299203872680664, 'timestamp': '2025-09-30 22:28:49.582030', 'step': 13545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.621669', 'step': 13545, 'epoch': 2} {'type': 'loss', 'content': 0.1421089768409729, 'timestamp': '2025-09-30 22:28:49.626617', 'step': 13546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:49.657618', 'step': 13546, 'epoch': 2} {'type': 'loss', 'content': 0.21685326099395752, 'timestamp': '2025-09-30 22:28:49.666804', 'step': 13547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:49.706160', 'step': 13547, 'epoch': 2} {'type': 'loss', 'content': 0.14351482689380646, 'timestamp': '2025-09-30 22:28:49.735162', 'step': 13548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.766908', 'step': 13548, 'epoch': 2} {'type': 'loss', 'content': 0.03945938125252724, 'timestamp': '2025-09-30 22:28:49.771583', 'step': 13549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.806443', 'step': 13549, 'epoch': 2} {'type': 'loss', 'content': 0.0866655483841896, 'timestamp': '2025-09-30 22:28:49.810560', 'step': 13550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:49.842166', 'step': 13550, 'epoch': 2} {'type': 'loss', 'content': 0.10173874348402023, 'timestamp': '2025-09-30 22:28:49.846118', 'step': 13551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:49.889840', 'step': 13551, 'epoch': 2} {'type': 'loss', 'content': 0.13666747510433197, 'timestamp': '2025-09-30 22:28:49.921845', 'step': 13552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.955100', 'step': 13552, 'epoch': 2} {'type': 'loss', 'content': 0.13256621360778809, 'timestamp': '2025-09-30 22:28:49.959313', 'step': 13553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:49.994558', 'step': 13553, 'epoch': 2} {'type': 'loss', 'content': 0.1299958974123001, 'timestamp': '2025-09-30 22:28:49.998480', 'step': 13554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.030875', 'step': 13554, 'epoch': 2} {'type': 'loss', 'content': 0.17346414923667908, 'timestamp': '2025-09-30 22:28:50.040396', 'step': 13555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.083751', 'step': 13555, 'epoch': 2} {'type': 'loss', 'content': 0.1683681458234787, 'timestamp': '2025-09-30 22:28:50.110662', 'step': 13556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:50.141084', 'step': 13556, 'epoch': 2} {'type': 'loss', 'content': 0.1425531506538391, 'timestamp': '2025-09-30 22:28:50.143908', 'step': 13557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.184216', 'step': 13557, 'epoch': 2} {'type': 'loss', 'content': 0.09581267833709717, 'timestamp': '2025-09-30 22:28:50.188364', 'step': 13558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:50.229298', 'step': 13558, 'epoch': 2} {'type': 'loss', 'content': 0.09365423023700714, 'timestamp': '2025-09-30 22:28:50.232014', 'step': 13559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:50.262994', 'step': 13559, 'epoch': 2} {'type': 'loss', 'content': 0.09135155379772186, 'timestamp': '2025-09-30 22:28:50.297787', 'step': 13560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:50.329458', 'step': 13560, 'epoch': 2} {'type': 'loss', 'content': 0.0851832702755928, 'timestamp': '2025-09-30 22:28:50.334360', 'step': 13561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.373948', 'step': 13561, 'epoch': 2} {'type': 'loss', 'content': 0.04574723541736603, 'timestamp': '2025-09-30 22:28:50.377743', 'step': 13562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.422746', 'step': 13562, 'epoch': 2} {'type': 'loss', 'content': 0.11983781307935715, 'timestamp': '2025-09-30 22:28:50.425647', 'step': 13563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:50.462884', 'step': 13563, 'epoch': 2} {'type': 'loss', 'content': 0.11453932523727417, 'timestamp': '2025-09-30 22:28:50.488150', 'step': 13564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:50.521400', 'step': 13564, 'epoch': 2} {'type': 'loss', 'content': 0.09738558530807495, 'timestamp': '2025-09-30 22:28:50.525736', 'step': 13565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:50.559327', 'step': 13565, 'epoch': 2} {'type': 'loss', 'content': 0.11699957400560379, 'timestamp': '2025-09-30 22:28:50.563703', 'step': 13566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:50.606906', 'step': 13566, 'epoch': 2} {'type': 'loss', 'content': 0.153725266456604, 'timestamp': '2025-09-30 22:28:50.611087', 'step': 13567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:50.642419', 'step': 13567, 'epoch': 2} {'type': 'loss', 'content': 0.14699168503284454, 'timestamp': '2025-09-30 22:28:50.668340', 'step': 13568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:50.701580', 'step': 13568, 'epoch': 2} {'type': 'loss', 'content': 0.12943197786808014, 'timestamp': '2025-09-30 22:28:50.706268', 'step': 13569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.748281', 'step': 13569, 'epoch': 2} {'type': 'loss', 'content': 0.08421889692544937, 'timestamp': '2025-09-30 22:28:50.752365', 'step': 13570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.786267', 'step': 13570, 'epoch': 2} {'type': 'loss', 'content': 0.10612712800502777, 'timestamp': '2025-09-30 22:28:50.789252', 'step': 13571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:50.820773', 'step': 13571, 'epoch': 2} {'type': 'loss', 'content': 0.12885302305221558, 'timestamp': '2025-09-30 22:28:50.847037', 'step': 13572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.878429', 'step': 13572, 'epoch': 2} {'type': 'loss', 'content': 0.1201423853635788, 'timestamp': '2025-09-30 22:28:50.882125', 'step': 13573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.914707', 'step': 13573, 'epoch': 2} {'type': 'loss', 'content': 0.12289287894964218, 'timestamp': '2025-09-30 22:28:50.923431', 'step': 13574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:50.959658', 'step': 13574, 'epoch': 2} {'type': 'loss', 'content': 0.06624715030193329, 'timestamp': '2025-09-30 22:28:50.964275', 'step': 13575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:50.996202', 'step': 13575, 'epoch': 2} {'type': 'loss', 'content': 0.14189577102661133, 'timestamp': '2025-09-30 22:28:51.021342', 'step': 13576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:51.061637', 'step': 13576, 'epoch': 2} {'type': 'loss', 'content': 0.10969699919223785, 'timestamp': '2025-09-30 22:28:51.064413', 'step': 13577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:51.096994', 'step': 13577, 'epoch': 2} {'type': 'loss', 'content': 0.09693878889083862, 'timestamp': '2025-09-30 22:28:51.107601', 'step': 13578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:51.143336', 'step': 13578, 'epoch': 2} {'type': 'loss', 'content': 0.08287303149700165, 'timestamp': '2025-09-30 22:28:51.157363', 'step': 13579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:51.188904', 'step': 13579, 'epoch': 2} {'type': 'loss', 'content': 0.1454833447933197, 'timestamp': '2025-09-30 22:28:51.213391', 'step': 13580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:51.255209', 'step': 13580, 'epoch': 2} {'type': 'loss', 'content': 0.08267352730035782, 'timestamp': '2025-09-30 22:28:51.258818', 'step': 13581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:51.297054', 'step': 13581, 'epoch': 2} {'type': 'loss', 'content': 0.12747271358966827, 'timestamp': '2025-09-30 22:28:51.310415', 'step': 13582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.353187', 'step': 13582, 'epoch': 2} {'type': 'loss', 'content': 0.12547330558300018, 'timestamp': '2025-09-30 22:28:51.358714', 'step': 13583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.392928', 'step': 13583, 'epoch': 2} {'type': 'loss', 'content': 0.06597912311553955, 'timestamp': '2025-09-30 22:28:51.418648', 'step': 13584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:51.451391', 'step': 13584, 'epoch': 2} {'type': 'loss', 'content': 0.15334618091583252, 'timestamp': '2025-09-30 22:28:51.455614', 'step': 13585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.487670', 'step': 13585, 'epoch': 2} {'type': 'loss', 'content': 0.1414303332567215, 'timestamp': '2025-09-30 22:28:51.490960', 'step': 13586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:51.524450', 'step': 13586, 'epoch': 2} {'type': 'loss', 'content': 0.07176732271909714, 'timestamp': '2025-09-30 22:28:51.527275', 'step': 13587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:51.565323', 'step': 13587, 'epoch': 2} {'type': 'loss', 'content': 0.1415233463048935, 'timestamp': '2025-09-30 22:28:51.594083', 'step': 13588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.628939', 'step': 13588, 'epoch': 2} {'type': 'loss', 'content': 0.08046843111515045, 'timestamp': '2025-09-30 22:28:51.636764', 'step': 13589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.672753', 'step': 13589, 'epoch': 2} {'type': 'loss', 'content': 0.08236343413591385, 'timestamp': '2025-09-30 22:28:51.681239', 'step': 13590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:51.715838', 'step': 13590, 'epoch': 2} {'type': 'loss', 'content': 0.0923415943980217, 'timestamp': '2025-09-30 22:28:51.718929', 'step': 13591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:51.771109', 'step': 13591, 'epoch': 2} {'type': 'loss', 'content': 0.19832491874694824, 'timestamp': '2025-09-30 22:28:51.799622', 'step': 13592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:51.834404', 'step': 13592, 'epoch': 2} {'type': 'loss', 'content': 0.1376347541809082, 'timestamp': '2025-09-30 22:28:51.837965', 'step': 13593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:51.874520', 'step': 13593, 'epoch': 2} {'type': 'loss', 'content': 0.06686775386333466, 'timestamp': '2025-09-30 22:28:51.879067', 'step': 13594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:51.913676', 'step': 13594, 'epoch': 2} {'type': 'loss', 'content': 0.15482810139656067, 'timestamp': '2025-09-30 22:28:51.917476', 'step': 13595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:51.950557', 'step': 13595, 'epoch': 2} {'type': 'loss', 'content': 0.07926534116268158, 'timestamp': '2025-09-30 22:28:51.975946', 'step': 13596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:52.008104', 'step': 13596, 'epoch': 2} {'type': 'loss', 'content': 0.12440424412488937, 'timestamp': '2025-09-30 22:28:52.013367', 'step': 13597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.047829', 'step': 13597, 'epoch': 2} {'type': 'loss', 'content': 0.22257359325885773, 'timestamp': '2025-09-30 22:28:52.051056', 'step': 13598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.083317', 'step': 13598, 'epoch': 2} {'type': 'loss', 'content': 0.17512257397174835, 'timestamp': '2025-09-30 22:28:52.089004', 'step': 13599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.120954', 'step': 13599, 'epoch': 2} {'type': 'loss', 'content': 0.09018280357122421, 'timestamp': '2025-09-30 22:28:52.146066', 'step': 13600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.178757', 'step': 13600, 'epoch': 2} {'type': 'loss', 'content': 0.07255490124225616, 'timestamp': '2025-09-30 22:28:52.182502', 'step': 13601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.216273', 'step': 13601, 'epoch': 2} {'type': 'loss', 'content': 0.15031522512435913, 'timestamp': '2025-09-30 22:28:52.218803', 'step': 13602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.254351', 'step': 13602, 'epoch': 2} {'type': 'loss', 'content': 0.07090668380260468, 'timestamp': '2025-09-30 22:28:52.263501', 'step': 13603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.297162', 'step': 13603, 'epoch': 2} {'type': 'loss', 'content': 0.06365133076906204, 'timestamp': '2025-09-30 22:28:52.322645', 'step': 13604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.353772', 'step': 13604, 'epoch': 2} {'type': 'loss', 'content': 0.18142329156398773, 'timestamp': '2025-09-30 22:28:52.365944', 'step': 13605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.401839', 'step': 13605, 'epoch': 2} {'type': 'loss', 'content': 0.08896234631538391, 'timestamp': '2025-09-30 22:28:52.405914', 'step': 13606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.436813', 'step': 13606, 'epoch': 2} {'type': 'loss', 'content': 0.12196607142686844, 'timestamp': '2025-09-30 22:28:52.439803', 'step': 13607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.472274', 'step': 13607, 'epoch': 2} {'type': 'loss', 'content': 0.09335213899612427, 'timestamp': '2025-09-30 22:28:52.498510', 'step': 13608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.540484', 'step': 13608, 'epoch': 2} {'type': 'loss', 'content': 0.11215971410274506, 'timestamp': '2025-09-30 22:28:52.556041', 'step': 13609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:52.593168', 'step': 13609, 'epoch': 2} {'type': 'loss', 'content': 0.10410066694021225, 'timestamp': '2025-09-30 22:28:52.596823', 'step': 13610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.634018', 'step': 13610, 'epoch': 2} {'type': 'loss', 'content': 0.0864270031452179, 'timestamp': '2025-09-30 22:28:52.636765', 'step': 13611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:52.671322', 'step': 13611, 'epoch': 2} {'type': 'loss', 'content': 0.07282894849777222, 'timestamp': '2025-09-30 22:28:52.697745', 'step': 13612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.728785', 'step': 13612, 'epoch': 2} {'type': 'loss', 'content': 0.1872619092464447, 'timestamp': '2025-09-30 22:28:52.732749', 'step': 13613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:52.764499', 'step': 13613, 'epoch': 2} {'type': 'loss', 'content': 0.10071063786745071, 'timestamp': '2025-09-30 22:28:52.778798', 'step': 13614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:52.819297', 'step': 13614, 'epoch': 2} {'type': 'loss', 'content': 0.12548480927944183, 'timestamp': '2025-09-30 22:28:52.823621', 'step': 13615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:52.854847', 'step': 13615, 'epoch': 2} {'type': 'loss', 'content': 0.1720820516347885, 'timestamp': '2025-09-30 22:28:52.880105', 'step': 13616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:52.913538', 'step': 13616, 'epoch': 2} {'type': 'loss', 'content': 0.11042091250419617, 'timestamp': '2025-09-30 22:28:52.931110', 'step': 13617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:52.963384', 'step': 13617, 'epoch': 2} {'type': 'loss', 'content': 0.201582133769989, 'timestamp': '2025-09-30 22:28:52.971381', 'step': 13618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.006446', 'step': 13618, 'epoch': 2} {'type': 'loss', 'content': 0.09684333950281143, 'timestamp': '2025-09-30 22:28:53.009714', 'step': 13619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.048562', 'step': 13619, 'epoch': 2} {'type': 'loss', 'content': 0.04769900068640709, 'timestamp': '2025-09-30 22:28:53.073606', 'step': 13620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.112699', 'step': 13620, 'epoch': 2} {'type': 'loss', 'content': 0.06809192150831223, 'timestamp': '2025-09-30 22:28:53.116979', 'step': 13621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.148702', 'step': 13621, 'epoch': 2} {'type': 'loss', 'content': 0.12212517857551575, 'timestamp': '2025-09-30 22:28:53.152532', 'step': 13622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.185106', 'step': 13622, 'epoch': 2} {'type': 'loss', 'content': 0.07760336250066757, 'timestamp': '2025-09-30 22:28:53.193279', 'step': 13623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.232203', 'step': 13623, 'epoch': 2} {'type': 'loss', 'content': 0.10323973000049591, 'timestamp': '2025-09-30 22:28:53.257240', 'step': 13624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.290319', 'step': 13624, 'epoch': 2} {'type': 'loss', 'content': 0.09953320771455765, 'timestamp': '2025-09-30 22:28:53.296112', 'step': 13625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.327957', 'step': 13625, 'epoch': 2} {'type': 'loss', 'content': 0.06748176366090775, 'timestamp': '2025-09-30 22:28:53.333718', 'step': 13626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.370880', 'step': 13626, 'epoch': 2} {'type': 'loss', 'content': 0.1156160980463028, 'timestamp': '2025-09-30 22:28:53.376015', 'step': 13627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:53.408273', 'step': 13627, 'epoch': 2} {'type': 'loss', 'content': 0.07614041119813919, 'timestamp': '2025-09-30 22:28:53.433145', 'step': 13628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.464671', 'step': 13628, 'epoch': 2} {'type': 'loss', 'content': 0.08238855004310608, 'timestamp': '2025-09-30 22:28:53.467283', 'step': 13629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.501381', 'step': 13629, 'epoch': 2} {'type': 'loss', 'content': 0.07026153802871704, 'timestamp': '2025-09-30 22:28:53.504396', 'step': 13630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:53.536559', 'step': 13630, 'epoch': 2} {'type': 'loss', 'content': 0.10369376093149185, 'timestamp': '2025-09-30 22:28:53.543749', 'step': 13631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.580073', 'step': 13631, 'epoch': 2} {'type': 'loss', 'content': 0.09004012495279312, 'timestamp': '2025-09-30 22:28:53.606866', 'step': 13632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.641457', 'step': 13632, 'epoch': 2} {'type': 'loss', 'content': 0.03764002025127411, 'timestamp': '2025-09-30 22:28:53.650283', 'step': 13633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.685529', 'step': 13633, 'epoch': 2} {'type': 'loss', 'content': 0.16014134883880615, 'timestamp': '2025-09-30 22:28:53.689536', 'step': 13634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.720709', 'step': 13634, 'epoch': 2} {'type': 'loss', 'content': 0.0908709168434143, 'timestamp': '2025-09-30 22:28:53.725145', 'step': 13635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.756507', 'step': 13635, 'epoch': 2} {'type': 'loss', 'content': 0.09037628024816513, 'timestamp': '2025-09-30 22:28:53.785766', 'step': 13636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.817898', 'step': 13636, 'epoch': 2} {'type': 'loss', 'content': 0.10784915089607239, 'timestamp': '2025-09-30 22:28:53.836677', 'step': 13637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:53.871952', 'step': 13637, 'epoch': 2} {'type': 'loss', 'content': 0.12732157111167908, 'timestamp': '2025-09-30 22:28:53.880572', 'step': 13638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.925790', 'step': 13638, 'epoch': 2} {'type': 'loss', 'content': 0.07763221859931946, 'timestamp': '2025-09-30 22:28:53.930166', 'step': 13639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:53.969153', 'step': 13639, 'epoch': 2} {'type': 'loss', 'content': 0.16222698986530304, 'timestamp': '2025-09-30 22:28:54.003839', 'step': 13640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:54.035168', 'step': 13640, 'epoch': 2} {'type': 'loss', 'content': 0.06478717178106308, 'timestamp': '2025-09-30 22:28:54.053392', 'step': 13641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:54.093044', 'step': 13641, 'epoch': 2} {'type': 'loss', 'content': 0.09309816360473633, 'timestamp': '2025-09-30 22:28:54.098496', 'step': 13642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.130807', 'step': 13642, 'epoch': 2} {'type': 'loss', 'content': 0.15407666563987732, 'timestamp': '2025-09-30 22:28:54.135500', 'step': 13643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.166135', 'step': 13643, 'epoch': 2} {'type': 'loss', 'content': 0.10652489215135574, 'timestamp': '2025-09-30 22:28:54.192489', 'step': 13644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:54.227775', 'step': 13644, 'epoch': 2} {'type': 'loss', 'content': 0.09945344924926758, 'timestamp': '2025-09-30 22:28:54.233039', 'step': 13645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:54.265083', 'step': 13645, 'epoch': 2} {'type': 'loss', 'content': 0.06745190173387527, 'timestamp': '2025-09-30 22:28:54.267973', 'step': 13646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.304822', 'step': 13646, 'epoch': 2} {'type': 'loss', 'content': 0.11250351369380951, 'timestamp': '2025-09-30 22:28:54.308847', 'step': 13647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.344624', 'step': 13647, 'epoch': 2} {'type': 'loss', 'content': 0.06891745328903198, 'timestamp': '2025-09-30 22:28:54.384533', 'step': 13648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:54.415960', 'step': 13648, 'epoch': 2} {'type': 'loss', 'content': 0.03331064432859421, 'timestamp': '2025-09-30 22:28:54.425713', 'step': 13649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:54.457765', 'step': 13649, 'epoch': 2} {'type': 'loss', 'content': 0.16925907135009766, 'timestamp': '2025-09-30 22:28:54.463527', 'step': 13650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:54.496951', 'step': 13650, 'epoch': 2} {'type': 'loss', 'content': 0.05341453105211258, 'timestamp': '2025-09-30 22:28:54.501094', 'step': 13651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:54.533228', 'step': 13651, 'epoch': 2} {'type': 'loss', 'content': 0.18339833617210388, 'timestamp': '2025-09-30 22:28:54.558289', 'step': 13652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:54.591174', 'step': 13652, 'epoch': 2} {'type': 'loss', 'content': 0.14913645386695862, 'timestamp': '2025-09-30 22:28:54.596352', 'step': 13653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.629282', 'step': 13653, 'epoch': 2} {'type': 'loss', 'content': 0.18229198455810547, 'timestamp': '2025-09-30 22:28:54.633067', 'step': 13654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:54.669871', 'step': 13654, 'epoch': 2} {'type': 'loss', 'content': 0.14734415709972382, 'timestamp': '2025-09-30 22:28:54.674540', 'step': 13655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.707366', 'step': 13655, 'epoch': 2} {'type': 'loss', 'content': 0.07728976756334305, 'timestamp': '2025-09-30 22:28:54.736017', 'step': 13656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.767592', 'step': 13656, 'epoch': 2} {'type': 'loss', 'content': 0.12349703907966614, 'timestamp': '2025-09-30 22:28:54.772587', 'step': 13657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:54.817897', 'step': 13657, 'epoch': 2} {'type': 'loss', 'content': 0.07189085334539413, 'timestamp': '2025-09-30 22:28:54.821624', 'step': 13658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.855178', 'step': 13658, 'epoch': 2} {'type': 'loss', 'content': 0.06613869220018387, 'timestamp': '2025-09-30 22:28:54.859294', 'step': 13659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:54.890146', 'step': 13659, 'epoch': 2} {'type': 'loss', 'content': 0.05728621408343315, 'timestamp': '2025-09-30 22:28:54.914906', 'step': 13660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:54.951845', 'step': 13660, 'epoch': 2} {'type': 'loss', 'content': 0.21641261875629425, 'timestamp': '2025-09-30 22:28:54.956822', 'step': 13661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:54.990467', 'step': 13661, 'epoch': 2} {'type': 'loss', 'content': 0.09443149715662003, 'timestamp': '2025-09-30 22:28:54.998064', 'step': 13662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.034691', 'step': 13662, 'epoch': 2} {'type': 'loss', 'content': 0.08893539011478424, 'timestamp': '2025-09-30 22:28:55.041234', 'step': 13663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:55.072863', 'step': 13663, 'epoch': 2} {'type': 'loss', 'content': 0.11983466893434525, 'timestamp': '2025-09-30 22:28:55.098768', 'step': 13664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:55.129833', 'step': 13664, 'epoch': 2} {'type': 'loss', 'content': 0.08812541514635086, 'timestamp': '2025-09-30 22:28:55.133030', 'step': 13665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:55.164268', 'step': 13665, 'epoch': 2} {'type': 'loss', 'content': 0.23140041530132294, 'timestamp': '2025-09-30 22:28:55.168360', 'step': 13666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.202929', 'step': 13666, 'epoch': 2} {'type': 'loss', 'content': 0.09505137801170349, 'timestamp': '2025-09-30 22:28:55.207741', 'step': 13667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:55.243326', 'step': 13667, 'epoch': 2} {'type': 'loss', 'content': 0.10898341983556747, 'timestamp': '2025-09-30 22:28:55.276444', 'step': 13668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:55.309025', 'step': 13668, 'epoch': 2} {'type': 'loss', 'content': 0.1393517106771469, 'timestamp': '2025-09-30 22:28:55.313792', 'step': 13669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:55.347324', 'step': 13669, 'epoch': 2} {'type': 'loss', 'content': 0.1407126635313034, 'timestamp': '2025-09-30 22:28:55.352556', 'step': 13670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.391146', 'step': 13670, 'epoch': 2} {'type': 'loss', 'content': 0.14573127031326294, 'timestamp': '2025-09-30 22:28:55.396179', 'step': 13671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:55.439187', 'step': 13671, 'epoch': 2} {'type': 'loss', 'content': 0.10450606793165207, 'timestamp': '2025-09-30 22:28:55.464443', 'step': 13672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.503829', 'step': 13672, 'epoch': 2} {'type': 'loss', 'content': 0.1352936327457428, 'timestamp': '2025-09-30 22:28:55.511587', 'step': 13673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:55.543854', 'step': 13673, 'epoch': 2} {'type': 'loss', 'content': 0.09509143978357315, 'timestamp': '2025-09-30 22:28:55.549025', 'step': 13674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.581419', 'step': 13674, 'epoch': 2} {'type': 'loss', 'content': 0.04972722753882408, 'timestamp': '2025-09-30 22:28:55.595274', 'step': 13675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:55.633570', 'step': 13675, 'epoch': 2} {'type': 'loss', 'content': 0.06296946108341217, 'timestamp': '2025-09-30 22:28:55.658279', 'step': 13676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.690864', 'step': 13676, 'epoch': 2} {'type': 'loss', 'content': 0.0863298550248146, 'timestamp': '2025-09-30 22:28:55.694211', 'step': 13677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:55.734516', 'step': 13677, 'epoch': 2} {'type': 'loss', 'content': 0.1675722897052765, 'timestamp': '2025-09-30 22:28:55.738423', 'step': 13678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:55.769717', 'step': 13678, 'epoch': 2} {'type': 'loss', 'content': 0.17625753581523895, 'timestamp': '2025-09-30 22:28:55.773621', 'step': 13679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:55.805679', 'step': 13679, 'epoch': 2} {'type': 'loss', 'content': 0.11009642481803894, 'timestamp': '2025-09-30 22:28:55.830994', 'step': 13680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.862177', 'step': 13680, 'epoch': 2} {'type': 'loss', 'content': 0.07870012521743774, 'timestamp': '2025-09-30 22:28:55.867466', 'step': 13681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:55.898938', 'step': 13681, 'epoch': 2} {'type': 'loss', 'content': 0.08647800981998444, 'timestamp': '2025-09-30 22:28:55.903414', 'step': 13682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.939583', 'step': 13682, 'epoch': 2} {'type': 'loss', 'content': 0.07764428853988647, 'timestamp': '2025-09-30 22:28:55.948060', 'step': 13683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:55.992648', 'step': 13683, 'epoch': 2} {'type': 'loss', 'content': 0.09221695363521576, 'timestamp': '2025-09-30 22:28:56.018405', 'step': 13684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.063509', 'step': 13684, 'epoch': 2} {'type': 'loss', 'content': 0.03173283487558365, 'timestamp': '2025-09-30 22:28:56.068509', 'step': 13685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.103597', 'step': 13685, 'epoch': 2} {'type': 'loss', 'content': 0.0927472785115242, 'timestamp': '2025-09-30 22:28:56.109769', 'step': 13686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.151011', 'step': 13686, 'epoch': 2} {'type': 'loss', 'content': 0.15300734341144562, 'timestamp': '2025-09-30 22:28:56.154301', 'step': 13687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.186164', 'step': 13687, 'epoch': 2} {'type': 'loss', 'content': 0.18338371813297272, 'timestamp': '2025-09-30 22:28:56.225041', 'step': 13688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.255647', 'step': 13688, 'epoch': 2} {'type': 'loss', 'content': 0.06468658894300461, 'timestamp': '2025-09-30 22:28:56.263870', 'step': 13689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.305563', 'step': 13689, 'epoch': 2} {'type': 'loss', 'content': 0.10603830218315125, 'timestamp': '2025-09-30 22:28:56.314298', 'step': 13690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:56.351873', 'step': 13690, 'epoch': 2} {'type': 'loss', 'content': 0.062314894050359726, 'timestamp': '2025-09-30 22:28:56.360352', 'step': 13691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.398524', 'step': 13691, 'epoch': 2} {'type': 'loss', 'content': 0.19614814221858978, 'timestamp': '2025-09-30 22:28:56.423294', 'step': 13692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.456847', 'step': 13692, 'epoch': 2} {'type': 'loss', 'content': 0.07027270644903183, 'timestamp': '2025-09-30 22:28:56.465333', 'step': 13693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.509168', 'step': 13693, 'epoch': 2} {'type': 'loss', 'content': 0.11720229685306549, 'timestamp': '2025-09-30 22:28:56.511950', 'step': 13694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:56.548385', 'step': 13694, 'epoch': 2} {'type': 'loss', 'content': 0.1092652678489685, 'timestamp': '2025-09-30 22:28:56.552394', 'step': 13695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.583095', 'step': 13695, 'epoch': 2} {'type': 'loss', 'content': 0.09500640630722046, 'timestamp': '2025-09-30 22:28:56.608342', 'step': 13696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.641878', 'step': 13696, 'epoch': 2} {'type': 'loss', 'content': 0.10564399510622025, 'timestamp': '2025-09-30 22:28:56.645281', 'step': 13697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:56.678810', 'step': 13697, 'epoch': 2} {'type': 'loss', 'content': 0.06206515058875084, 'timestamp': '2025-09-30 22:28:56.684032', 'step': 13698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:56.719336', 'step': 13698, 'epoch': 2} {'type': 'loss', 'content': 0.15426890552043915, 'timestamp': '2025-09-30 22:28:56.729376', 'step': 13699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.763171', 'step': 13699, 'epoch': 2} {'type': 'loss', 'content': 0.1387680470943451, 'timestamp': '2025-09-30 22:28:56.794059', 'step': 13700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:56.832562', 'step': 13700, 'epoch': 2} {'type': 'loss', 'content': 0.0806795284152031, 'timestamp': '2025-09-30 22:28:56.836796', 'step': 13701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:56.869072', 'step': 13701, 'epoch': 2} {'type': 'loss', 'content': 0.19410325586795807, 'timestamp': '2025-09-30 22:28:56.873522', 'step': 13702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:56.906745', 'step': 13702, 'epoch': 2} {'type': 'loss', 'content': 0.08187734335660934, 'timestamp': '2025-09-30 22:28:56.910855', 'step': 13703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:56.942387', 'step': 13703, 'epoch': 2} {'type': 'loss', 'content': 0.08430067449808121, 'timestamp': '2025-09-30 22:28:56.970116', 'step': 13704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:57.002593', 'step': 13704, 'epoch': 2} {'type': 'loss', 'content': 0.03085925616323948, 'timestamp': '2025-09-30 22:28:57.007309', 'step': 13705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:57.039092', 'step': 13705, 'epoch': 2} {'type': 'loss', 'content': 0.07572481781244278, 'timestamp': '2025-09-30 22:28:57.042179', 'step': 13706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:57.080606', 'step': 13706, 'epoch': 2} {'type': 'loss', 'content': 0.11158010363578796, 'timestamp': '2025-09-30 22:28:57.092836', 'step': 13707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.133986', 'step': 13707, 'epoch': 2} {'type': 'loss', 'content': 0.06193353980779648, 'timestamp': '2025-09-30 22:28:57.159083', 'step': 13708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:57.196758', 'step': 13708, 'epoch': 2} {'type': 'loss', 'content': 0.08841259777545929, 'timestamp': '2025-09-30 22:28:57.200213', 'step': 13709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.232829', 'step': 13709, 'epoch': 2} {'type': 'loss', 'content': 0.1507745385169983, 'timestamp': '2025-09-30 22:28:57.236394', 'step': 13710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.267336', 'step': 13710, 'epoch': 2} {'type': 'loss', 'content': 0.06738198548555374, 'timestamp': '2025-09-30 22:28:57.272718', 'step': 13711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:57.314442', 'step': 13711, 'epoch': 2} {'type': 'loss', 'content': 0.07740343362092972, 'timestamp': '2025-09-30 22:28:57.342541', 'step': 13712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:57.374785', 'step': 13712, 'epoch': 2} {'type': 'loss', 'content': 0.05242443457245827, 'timestamp': '2025-09-30 22:28:57.380676', 'step': 13713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.414258', 'step': 13713, 'epoch': 2} {'type': 'loss', 'content': 0.20539267361164093, 'timestamp': '2025-09-30 22:28:57.417645', 'step': 13714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:57.449253', 'step': 13714, 'epoch': 2} {'type': 'loss', 'content': 0.13311345875263214, 'timestamp': '2025-09-30 22:28:57.453686', 'step': 13715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:57.486771', 'step': 13715, 'epoch': 2} {'type': 'loss', 'content': 0.089052215218544, 'timestamp': '2025-09-30 22:28:57.512610', 'step': 13716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:57.543092', 'step': 13716, 'epoch': 2} {'type': 'loss', 'content': 0.09777822345495224, 'timestamp': '2025-09-30 22:28:57.546920', 'step': 13717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:57.578771', 'step': 13717, 'epoch': 2} {'type': 'loss', 'content': 0.11575593799352646, 'timestamp': '2025-09-30 22:28:57.583233', 'step': 13718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:28:57.616775', 'step': 13718, 'epoch': 2} {'type': 'loss', 'content': 0.12740568816661835, 'timestamp': '2025-09-30 22:28:57.622786', 'step': 13719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.655165', 'step': 13719, 'epoch': 2} {'type': 'loss', 'content': 0.08586401492357254, 'timestamp': '2025-09-30 22:28:57.679313', 'step': 13720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.711163', 'step': 13720, 'epoch': 2} {'type': 'loss', 'content': 0.03932815417647362, 'timestamp': '2025-09-30 22:28:57.716123', 'step': 13721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.749345', 'step': 13721, 'epoch': 2} {'type': 'loss', 'content': 0.04448969289660454, 'timestamp': '2025-09-30 22:28:57.754080', 'step': 13722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:57.784053', 'step': 13722, 'epoch': 2} {'type': 'loss', 'content': 0.07690183818340302, 'timestamp': '2025-09-30 22:28:57.787521', 'step': 13723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:57.817844', 'step': 13723, 'epoch': 2} {'type': 'loss', 'content': 0.12400887161493301, 'timestamp': '2025-09-30 22:28:57.844035', 'step': 13724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.876279', 'step': 13724, 'epoch': 2} {'type': 'loss', 'content': 0.1200079396367073, 'timestamp': '2025-09-30 22:28:57.879121', 'step': 13725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:57.915485', 'step': 13725, 'epoch': 2} {'type': 'loss', 'content': 0.11773968487977982, 'timestamp': '2025-09-30 22:28:57.921081', 'step': 13726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.952264', 'step': 13726, 'epoch': 2} {'type': 'loss', 'content': 0.0954165905714035, 'timestamp': '2025-09-30 22:28:57.960056', 'step': 13727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:57.991765', 'step': 13727, 'epoch': 2} {'type': 'loss', 'content': 0.10737159848213196, 'timestamp': '2025-09-30 22:28:58.017410', 'step': 13728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.053897', 'step': 13728, 'epoch': 2} {'type': 'loss', 'content': 0.1336759477853775, 'timestamp': '2025-09-30 22:28:58.058005', 'step': 13729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:58.100054', 'step': 13729, 'epoch': 2} {'type': 'loss', 'content': 0.09120900928974152, 'timestamp': '2025-09-30 22:28:58.103398', 'step': 13730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:58.134337', 'step': 13730, 'epoch': 2} {'type': 'loss', 'content': 0.11054233461618423, 'timestamp': '2025-09-30 22:28:58.139625', 'step': 13731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.172959', 'step': 13731, 'epoch': 2} {'type': 'loss', 'content': 0.09750447422266006, 'timestamp': '2025-09-30 22:28:58.199028', 'step': 13732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.231179', 'step': 13732, 'epoch': 2} {'type': 'loss', 'content': 0.0570610836148262, 'timestamp': '2025-09-30 22:28:58.234152', 'step': 13733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:58.266523', 'step': 13733, 'epoch': 2} {'type': 'loss', 'content': 0.12154944241046906, 'timestamp': '2025-09-30 22:28:58.272051', 'step': 13734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:58.305390', 'step': 13734, 'epoch': 2} {'type': 'loss', 'content': 0.0976969376206398, 'timestamp': '2025-09-30 22:28:58.315205', 'step': 13735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.353129', 'step': 13735, 'epoch': 2} {'type': 'loss', 'content': 0.19522039592266083, 'timestamp': '2025-09-30 22:28:58.378762', 'step': 13736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.412992', 'step': 13736, 'epoch': 2} {'type': 'loss', 'content': 0.04521654546260834, 'timestamp': '2025-09-30 22:28:58.417884', 'step': 13737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.454790', 'step': 13737, 'epoch': 2} {'type': 'loss', 'content': 0.17493507266044617, 'timestamp': '2025-09-30 22:28:58.458950', 'step': 13738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.490637', 'step': 13738, 'epoch': 2} {'type': 'loss', 'content': 0.14599817991256714, 'timestamp': '2025-09-30 22:28:58.494330', 'step': 13739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:58.528347', 'step': 13739, 'epoch': 2} {'type': 'loss', 'content': 0.09730557352304459, 'timestamp': '2025-09-30 22:28:58.552764', 'step': 13740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.584159', 'step': 13740, 'epoch': 2} {'type': 'loss', 'content': 0.1304924190044403, 'timestamp': '2025-09-30 22:28:58.587753', 'step': 13741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.625232', 'step': 13741, 'epoch': 2} {'type': 'loss', 'content': 0.03169011324644089, 'timestamp': '2025-09-30 22:28:58.630275', 'step': 13742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.670241', 'step': 13742, 'epoch': 2} {'type': 'loss', 'content': 0.2236335724592209, 'timestamp': '2025-09-30 22:28:58.676637', 'step': 13743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:58.710953', 'step': 13743, 'epoch': 2} {'type': 'loss', 'content': 0.08883039653301239, 'timestamp': '2025-09-30 22:28:58.736291', 'step': 13744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.773050', 'step': 13744, 'epoch': 2} {'type': 'loss', 'content': 0.12559810280799866, 'timestamp': '2025-09-30 22:28:58.778341', 'step': 13745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:58.813733', 'step': 13745, 'epoch': 2} {'type': 'loss', 'content': 0.08635614067316055, 'timestamp': '2025-09-30 22:28:58.817271', 'step': 13746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.848690', 'step': 13746, 'epoch': 2} {'type': 'loss', 'content': 0.11785512417554855, 'timestamp': '2025-09-30 22:28:58.853834', 'step': 13747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:58.888163', 'step': 13747, 'epoch': 2} {'type': 'loss', 'content': 0.1405544877052307, 'timestamp': '2025-09-30 22:28:58.914202', 'step': 13748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.946721', 'step': 13748, 'epoch': 2} {'type': 'loss', 'content': 0.09515539556741714, 'timestamp': '2025-09-30 22:28:58.952056', 'step': 13749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:58.984727', 'step': 13749, 'epoch': 2} {'type': 'loss', 'content': 0.11170926690101624, 'timestamp': '2025-09-30 22:28:58.987993', 'step': 13750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:59.021527', 'step': 13750, 'epoch': 2} {'type': 'loss', 'content': 0.13689769804477692, 'timestamp': '2025-09-30 22:28:59.026787', 'step': 13751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:59.059020', 'step': 13751, 'epoch': 2} {'type': 'loss', 'content': 0.16966697573661804, 'timestamp': '2025-09-30 22:28:59.083949', 'step': 13752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:28:59.119432', 'step': 13752, 'epoch': 2} {'type': 'loss', 'content': 0.07381802797317505, 'timestamp': '2025-09-30 22:28:59.121773', 'step': 13753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:59.154657', 'step': 13753, 'epoch': 2} {'type': 'loss', 'content': 0.23759829998016357, 'timestamp': '2025-09-30 22:28:59.169631', 'step': 13754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:59.203149', 'step': 13754, 'epoch': 2} {'type': 'loss', 'content': 0.16824160516262054, 'timestamp': '2025-09-30 22:28:59.208141', 'step': 13755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:59.239684', 'step': 13755, 'epoch': 2} {'type': 'loss', 'content': 0.07397732138633728, 'timestamp': '2025-09-30 22:28:59.265681', 'step': 13756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:59.307334', 'step': 13756, 'epoch': 2} {'type': 'loss', 'content': 0.07500648498535156, 'timestamp': '2025-09-30 22:28:59.320699', 'step': 13757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:59.360904', 'step': 13757, 'epoch': 2} {'type': 'loss', 'content': 0.12004733085632324, 'timestamp': '2025-09-30 22:28:59.364134', 'step': 13758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:59.396002', 'step': 13758, 'epoch': 2} {'type': 'loss', 'content': 0.10355039685964584, 'timestamp': '2025-09-30 22:28:59.399427', 'step': 13759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.442264', 'step': 13759, 'epoch': 2} {'type': 'loss', 'content': 0.1890285462141037, 'timestamp': '2025-09-30 22:28:59.469696', 'step': 13760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.505933', 'step': 13760, 'epoch': 2} {'type': 'loss', 'content': 0.10793793201446533, 'timestamp': '2025-09-30 22:28:59.509984', 'step': 13761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.542424', 'step': 13761, 'epoch': 2} {'type': 'loss', 'content': 0.09042235463857651, 'timestamp': '2025-09-30 22:28:59.546688', 'step': 13762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:28:59.578731', 'step': 13762, 'epoch': 2} {'type': 'loss', 'content': 0.10494494438171387, 'timestamp': '2025-09-30 22:28:59.581633', 'step': 13763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:59.622301', 'step': 13763, 'epoch': 2} {'type': 'loss', 'content': 0.09770634025335312, 'timestamp': '2025-09-30 22:28:59.649391', 'step': 13764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:28:59.681554', 'step': 13764, 'epoch': 2} {'type': 'loss', 'content': 0.10542815923690796, 'timestamp': '2025-09-30 22:28:59.686841', 'step': 13765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.719418', 'step': 13765, 'epoch': 2} {'type': 'loss', 'content': 0.09170948714017868, 'timestamp': '2025-09-30 22:28:59.725145', 'step': 13766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.762739', 'step': 13766, 'epoch': 2} {'type': 'loss', 'content': 0.20368355512619019, 'timestamp': '2025-09-30 22:28:59.771252', 'step': 13767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:28:59.822043', 'step': 13767, 'epoch': 2} {'type': 'loss', 'content': 0.043012846261262894, 'timestamp': '2025-09-30 22:28:59.848575', 'step': 13768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:28:59.881382', 'step': 13768, 'epoch': 2} {'type': 'loss', 'content': 0.09521917253732681, 'timestamp': '2025-09-30 22:28:59.884920', 'step': 13769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:28:59.918044', 'step': 13769, 'epoch': 2} {'type': 'loss', 'content': 0.09941256046295166, 'timestamp': '2025-09-30 22:28:59.923583', 'step': 13770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:28:59.961090', 'step': 13770, 'epoch': 2} {'type': 'loss', 'content': 0.12911106646060944, 'timestamp': '2025-09-30 22:28:59.966479', 'step': 13771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.000008', 'step': 13771, 'epoch': 2} {'type': 'loss', 'content': 0.02885342761874199, 'timestamp': '2025-09-30 22:29:00.027994', 'step': 13772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:00.066872', 'step': 13772, 'epoch': 2} {'type': 'loss', 'content': 0.14368118345737457, 'timestamp': '2025-09-30 22:29:00.070927', 'step': 13773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:00.103611', 'step': 13773, 'epoch': 2} {'type': 'loss', 'content': 0.09655173122882843, 'timestamp': '2025-09-30 22:29:00.108385', 'step': 13774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.143196', 'step': 13774, 'epoch': 2} {'type': 'loss', 'content': 0.1561286598443985, 'timestamp': '2025-09-30 22:29:00.148954', 'step': 13775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:00.198766', 'step': 13775, 'epoch': 2} {'type': 'loss', 'content': 0.16160200536251068, 'timestamp': '2025-09-30 22:29:00.224596', 'step': 13776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.257082', 'step': 13776, 'epoch': 2} {'type': 'loss', 'content': 0.16647230088710785, 'timestamp': '2025-09-30 22:29:00.261933', 'step': 13777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:00.296395', 'step': 13777, 'epoch': 2} {'type': 'loss', 'content': 0.20407961308956146, 'timestamp': '2025-09-30 22:29:00.299574', 'step': 13778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.332772', 'step': 13778, 'epoch': 2} {'type': 'loss', 'content': 0.16059674322605133, 'timestamp': '2025-09-30 22:29:00.337180', 'step': 13779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:00.370824', 'step': 13779, 'epoch': 2} {'type': 'loss', 'content': 0.0744914561510086, 'timestamp': '2025-09-30 22:29:00.396894', 'step': 13780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.441525', 'step': 13780, 'epoch': 2} {'type': 'loss', 'content': 0.11448376625776291, 'timestamp': '2025-09-30 22:29:00.445830', 'step': 13781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:00.477320', 'step': 13781, 'epoch': 2} {'type': 'loss', 'content': 0.18063047528266907, 'timestamp': '2025-09-30 22:29:00.493852', 'step': 13782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:00.527398', 'step': 13782, 'epoch': 2} {'type': 'loss', 'content': 0.1005888357758522, 'timestamp': '2025-09-30 22:29:00.531387', 'step': 13783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.564957', 'step': 13783, 'epoch': 2} {'type': 'loss', 'content': 0.06848271191120148, 'timestamp': '2025-09-30 22:29:00.591070', 'step': 13784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:00.626515', 'step': 13784, 'epoch': 2} {'type': 'loss', 'content': 0.11231640726327896, 'timestamp': '2025-09-30 22:29:00.632142', 'step': 13785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:00.664544', 'step': 13785, 'epoch': 2} {'type': 'loss', 'content': 0.05167059972882271, 'timestamp': '2025-09-30 22:29:00.668535', 'step': 13786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:00.701228', 'step': 13786, 'epoch': 2} {'type': 'loss', 'content': 0.04709457606077194, 'timestamp': '2025-09-30 22:29:00.706833', 'step': 13787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.742686', 'step': 13787, 'epoch': 2} {'type': 'loss', 'content': 0.05325412377715111, 'timestamp': '2025-09-30 22:29:00.770414', 'step': 13788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:00.802768', 'step': 13788, 'epoch': 2} {'type': 'loss', 'content': 0.12052745372056961, 'timestamp': '2025-09-30 22:29:00.807600', 'step': 13789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.840277', 'step': 13789, 'epoch': 2} {'type': 'loss', 'content': 0.1503242403268814, 'timestamp': '2025-09-30 22:29:00.854422', 'step': 13790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.893775', 'step': 13790, 'epoch': 2} {'type': 'loss', 'content': 0.12165569514036179, 'timestamp': '2025-09-30 22:29:00.898114', 'step': 13791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:00.929439', 'step': 13791, 'epoch': 2} {'type': 'loss', 'content': 0.07453878223896027, 'timestamp': '2025-09-30 22:29:00.955866', 'step': 13792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:00.987060', 'step': 13792, 'epoch': 2} {'type': 'loss', 'content': 0.05647199973464012, 'timestamp': '2025-09-30 22:29:00.991599', 'step': 13793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.036175', 'step': 13793, 'epoch': 2} {'type': 'loss', 'content': 0.12166763097047806, 'timestamp': '2025-09-30 22:29:01.041092', 'step': 13794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:01.074876', 'step': 13794, 'epoch': 2} {'type': 'loss', 'content': 0.1153157502412796, 'timestamp': '2025-09-30 22:29:01.079102', 'step': 13795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:01.110273', 'step': 13795, 'epoch': 2} {'type': 'loss', 'content': 0.16470417380332947, 'timestamp': '2025-09-30 22:29:01.137398', 'step': 13796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:01.169231', 'step': 13796, 'epoch': 2} {'type': 'loss', 'content': 0.0699014663696289, 'timestamp': '2025-09-30 22:29:01.172786', 'step': 13797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:01.205435', 'step': 13797, 'epoch': 2} {'type': 'loss', 'content': 0.12625662982463837, 'timestamp': '2025-09-30 22:29:01.209950', 'step': 13798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:01.256722', 'step': 13798, 'epoch': 2} {'type': 'loss', 'content': 0.12089554220438004, 'timestamp': '2025-09-30 22:29:01.261048', 'step': 13799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:01.294393', 'step': 13799, 'epoch': 2} {'type': 'loss', 'content': 0.0875694677233696, 'timestamp': '2025-09-30 22:29:01.322525', 'step': 13800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:01.354495', 'step': 13800, 'epoch': 2} {'type': 'loss', 'content': 0.11665135622024536, 'timestamp': '2025-09-30 22:29:01.359042', 'step': 13801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:01.391173', 'step': 13801, 'epoch': 2} {'type': 'loss', 'content': 0.1300906538963318, 'timestamp': '2025-09-30 22:29:01.397135', 'step': 13802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.431547', 'step': 13802, 'epoch': 2} {'type': 'loss', 'content': 0.15714874863624573, 'timestamp': '2025-09-30 22:29:01.434935', 'step': 13803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.476546', 'step': 13803, 'epoch': 2} {'type': 'loss', 'content': 0.08663146942853928, 'timestamp': '2025-09-30 22:29:01.501432', 'step': 13804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:01.542813', 'step': 13804, 'epoch': 2} {'type': 'loss', 'content': 0.16983585059642792, 'timestamp': '2025-09-30 22:29:01.558450', 'step': 13805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.590938', 'step': 13805, 'epoch': 2} {'type': 'loss', 'content': 0.06843350827693939, 'timestamp': '2025-09-30 22:29:01.603340', 'step': 13806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.647702', 'step': 13806, 'epoch': 2} {'type': 'loss', 'content': 0.11391589045524597, 'timestamp': '2025-09-30 22:29:01.651351', 'step': 13807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:01.693595', 'step': 13807, 'epoch': 2} {'type': 'loss', 'content': 0.07163777947425842, 'timestamp': '2025-09-30 22:29:01.718803', 'step': 13808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:01.750070', 'step': 13808, 'epoch': 2} {'type': 'loss', 'content': 0.09030536562204361, 'timestamp': '2025-09-30 22:29:01.755694', 'step': 13809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.788627', 'step': 13809, 'epoch': 2} {'type': 'loss', 'content': 0.11786674708127975, 'timestamp': '2025-09-30 22:29:01.793939', 'step': 13810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:01.825528', 'step': 13810, 'epoch': 2} {'type': 'loss', 'content': 0.06563834846019745, 'timestamp': '2025-09-30 22:29:01.832101', 'step': 13811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:01.874829', 'step': 13811, 'epoch': 2} {'type': 'loss', 'content': 0.09083320945501328, 'timestamp': '2025-09-30 22:29:01.901202', 'step': 13812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:01.943944', 'step': 13812, 'epoch': 2} {'type': 'loss', 'content': 0.1111048087477684, 'timestamp': '2025-09-30 22:29:01.949782', 'step': 13813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:01.981919', 'step': 13813, 'epoch': 2} {'type': 'loss', 'content': 0.12040930241346359, 'timestamp': '2025-09-30 22:29:01.988258', 'step': 13814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:02.038399', 'step': 13814, 'epoch': 2} {'type': 'loss', 'content': 0.14204823970794678, 'timestamp': '2025-09-30 22:29:02.044820', 'step': 13815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.095851', 'step': 13815, 'epoch': 2} {'type': 'loss', 'content': 0.10366027802228928, 'timestamp': '2025-09-30 22:29:02.131074', 'step': 13816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:02.163552', 'step': 13816, 'epoch': 2} {'type': 'loss', 'content': 0.10063812881708145, 'timestamp': '2025-09-30 22:29:02.170584', 'step': 13817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.215773', 'step': 13817, 'epoch': 2} {'type': 'loss', 'content': 0.13579566776752472, 'timestamp': '2025-09-30 22:29:02.220778', 'step': 13818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:02.256031', 'step': 13818, 'epoch': 2} {'type': 'loss', 'content': 0.08005119860172272, 'timestamp': '2025-09-30 22:29:02.258665', 'step': 13819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.290590', 'step': 13819, 'epoch': 2} {'type': 'loss', 'content': 0.1974380612373352, 'timestamp': '2025-09-30 22:29:02.316477', 'step': 13820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:02.348645', 'step': 13820, 'epoch': 2} {'type': 'loss', 'content': 0.06874620914459229, 'timestamp': '2025-09-30 22:29:02.353335', 'step': 13821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.396041', 'step': 13821, 'epoch': 2} {'type': 'loss', 'content': 0.09297254681587219, 'timestamp': '2025-09-30 22:29:02.402882', 'step': 13822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.434428', 'step': 13822, 'epoch': 2} {'type': 'loss', 'content': 0.08401589095592499, 'timestamp': '2025-09-30 22:29:02.438276', 'step': 13823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:02.470008', 'step': 13823, 'epoch': 2} {'type': 'loss', 'content': 0.17636613547801971, 'timestamp': '2025-09-30 22:29:02.495932', 'step': 13824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:02.528476', 'step': 13824, 'epoch': 2} {'type': 'loss', 'content': 0.16810165345668793, 'timestamp': '2025-09-30 22:29:02.535117', 'step': 13825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:02.568631', 'step': 13825, 'epoch': 2} {'type': 'loss', 'content': 0.07246632128953934, 'timestamp': '2025-09-30 22:29:02.572617', 'step': 13826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:02.620189', 'step': 13826, 'epoch': 2} {'type': 'loss', 'content': 0.10261315107345581, 'timestamp': '2025-09-30 22:29:02.635237', 'step': 13827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:02.677012', 'step': 13827, 'epoch': 2} {'type': 'loss', 'content': 0.09893863648176193, 'timestamp': '2025-09-30 22:29:02.717025', 'step': 13828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:02.760475', 'step': 13828, 'epoch': 2} {'type': 'loss', 'content': 0.10857106745243073, 'timestamp': '2025-09-30 22:29:02.767977', 'step': 13829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:02.802364', 'step': 13829, 'epoch': 2} {'type': 'loss', 'content': 0.15128262341022491, 'timestamp': '2025-09-30 22:29:02.806794', 'step': 13830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:02.859172', 'step': 13830, 'epoch': 2} {'type': 'loss', 'content': 0.08757779002189636, 'timestamp': '2025-09-30 22:29:02.874553', 'step': 13831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:02.915937', 'step': 13831, 'epoch': 2} {'type': 'loss', 'content': 0.05715620890259743, 'timestamp': '2025-09-30 22:29:02.944696', 'step': 13832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:02.977492', 'step': 13832, 'epoch': 2} {'type': 'loss', 'content': 0.12696699798107147, 'timestamp': '2025-09-30 22:29:02.993169', 'step': 13833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.033931', 'step': 13833, 'epoch': 2} {'type': 'loss', 'content': 0.06380845606327057, 'timestamp': '2025-09-30 22:29:03.037676', 'step': 13834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.070229', 'step': 13834, 'epoch': 2} {'type': 'loss', 'content': 0.07571518421173096, 'timestamp': '2025-09-30 22:29:03.072965', 'step': 13835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.104214', 'step': 13835, 'epoch': 2} {'type': 'loss', 'content': 0.1240665465593338, 'timestamp': '2025-09-30 22:29:03.130113', 'step': 13836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:03.163043', 'step': 13836, 'epoch': 2} {'type': 'loss', 'content': 0.09123716503381729, 'timestamp': '2025-09-30 22:29:03.167722', 'step': 13837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.198976', 'step': 13837, 'epoch': 2} {'type': 'loss', 'content': 0.11309005320072174, 'timestamp': '2025-09-30 22:29:03.203826', 'step': 13838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:03.236038', 'step': 13838, 'epoch': 2} {'type': 'loss', 'content': 0.11807052046060562, 'timestamp': '2025-09-30 22:29:03.240759', 'step': 13839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.273084', 'step': 13839, 'epoch': 2} {'type': 'loss', 'content': 0.16102378070354462, 'timestamp': '2025-09-30 22:29:03.299119', 'step': 13840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.329689', 'step': 13840, 'epoch': 2} {'type': 'loss', 'content': 0.08112085610628128, 'timestamp': '2025-09-30 22:29:03.343852', 'step': 13841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:03.392167', 'step': 13841, 'epoch': 2} {'type': 'loss', 'content': 0.10118616372346878, 'timestamp': '2025-09-30 22:29:03.396013', 'step': 13842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.429501', 'step': 13842, 'epoch': 2} {'type': 'loss', 'content': 0.06427037715911865, 'timestamp': '2025-09-30 22:29:03.434929', 'step': 13843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:03.469010', 'step': 13843, 'epoch': 2} {'type': 'loss', 'content': 0.07486990094184875, 'timestamp': '2025-09-30 22:29:03.495562', 'step': 13844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.529335', 'step': 13844, 'epoch': 2} {'type': 'loss', 'content': 0.11542928963899612, 'timestamp': '2025-09-30 22:29:03.546734', 'step': 13845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.592814', 'step': 13845, 'epoch': 2} {'type': 'loss', 'content': 0.1609216332435608, 'timestamp': '2025-09-30 22:29:03.598503', 'step': 13846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:03.646685', 'step': 13846, 'epoch': 2} {'type': 'loss', 'content': 0.1441200226545334, 'timestamp': '2025-09-30 22:29:03.651023', 'step': 13847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.682015', 'step': 13847, 'epoch': 2} {'type': 'loss', 'content': 0.22252768278121948, 'timestamp': '2025-09-30 22:29:03.710022', 'step': 13848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.741865', 'step': 13848, 'epoch': 2} {'type': 'loss', 'content': 0.11470451205968857, 'timestamp': '2025-09-30 22:29:03.747054', 'step': 13849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:03.778960', 'step': 13849, 'epoch': 2} {'type': 'loss', 'content': 0.06742284446954727, 'timestamp': '2025-09-30 22:29:03.782953', 'step': 13850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.817165', 'step': 13850, 'epoch': 2} {'type': 'loss', 'content': 0.08712589740753174, 'timestamp': '2025-09-30 22:29:03.821730', 'step': 13851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.856984', 'step': 13851, 'epoch': 2} {'type': 'loss', 'content': 0.15811263024806976, 'timestamp': '2025-09-30 22:29:03.881282', 'step': 13852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:03.922350', 'step': 13852, 'epoch': 2} {'type': 'loss', 'content': 0.09176622331142426, 'timestamp': '2025-09-30 22:29:03.936245', 'step': 13853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:03.978614', 'step': 13853, 'epoch': 2} {'type': 'loss', 'content': 0.1594429463148117, 'timestamp': '2025-09-30 22:29:03.993540', 'step': 13854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:04.025672', 'step': 13854, 'epoch': 2} {'type': 'loss', 'content': 0.06649551540613174, 'timestamp': '2025-09-30 22:29:04.035687', 'step': 13855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:04.075876', 'step': 13855, 'epoch': 2} {'type': 'loss', 'content': 0.15481460094451904, 'timestamp': '2025-09-30 22:29:04.112830', 'step': 13856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:04.163697', 'step': 13856, 'epoch': 2} {'type': 'loss', 'content': 0.12945836782455444, 'timestamp': '2025-09-30 22:29:04.174026', 'step': 13857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.233548', 'step': 13857, 'epoch': 2} {'type': 'loss', 'content': 0.1210038959980011, 'timestamp': '2025-09-30 22:29:04.239491', 'step': 13858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:04.271361', 'step': 13858, 'epoch': 2} {'type': 'loss', 'content': 0.12152393907308578, 'timestamp': '2025-09-30 22:29:04.276308', 'step': 13859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.326931', 'step': 13859, 'epoch': 2} {'type': 'loss', 'content': 0.10698999464511871, 'timestamp': '2025-09-30 22:29:04.360471', 'step': 13860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.410902', 'step': 13860, 'epoch': 2} {'type': 'loss', 'content': 0.11278117448091507, 'timestamp': '2025-09-30 22:29:04.417130', 'step': 13861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.450465', 'step': 13861, 'epoch': 2} {'type': 'loss', 'content': 0.14481915533542633, 'timestamp': '2025-09-30 22:29:04.454526', 'step': 13862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.497401', 'step': 13862, 'epoch': 2} {'type': 'loss', 'content': 0.13071098923683167, 'timestamp': '2025-09-30 22:29:04.512625', 'step': 13863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:04.554619', 'step': 13863, 'epoch': 2} {'type': 'loss', 'content': 0.09463117271661758, 'timestamp': '2025-09-30 22:29:04.586620', 'step': 13864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:04.619040', 'step': 13864, 'epoch': 2} {'type': 'loss', 'content': 0.13382327556610107, 'timestamp': '2025-09-30 22:29:04.623159', 'step': 13865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.662642', 'step': 13865, 'epoch': 2} {'type': 'loss', 'content': 0.055124908685684204, 'timestamp': '2025-09-30 22:29:04.665730', 'step': 13866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:04.696889', 'step': 13866, 'epoch': 2} {'type': 'loss', 'content': 0.10606308281421661, 'timestamp': '2025-09-30 22:29:04.702977', 'step': 13867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:04.738533', 'step': 13867, 'epoch': 2} {'type': 'loss', 'content': 0.07892224192619324, 'timestamp': '2025-09-30 22:29:04.764022', 'step': 13868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.795597', 'step': 13868, 'epoch': 2} {'type': 'loss', 'content': 0.07531731575727463, 'timestamp': '2025-09-30 22:29:04.808771', 'step': 13869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.844479', 'step': 13869, 'epoch': 2} {'type': 'loss', 'content': 0.09530192613601685, 'timestamp': '2025-09-30 22:29:04.862335', 'step': 13870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:04.894661', 'step': 13870, 'epoch': 2} {'type': 'loss', 'content': 0.046507056802511215, 'timestamp': '2025-09-30 22:29:04.899121', 'step': 13871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:04.931870', 'step': 13871, 'epoch': 2} {'type': 'loss', 'content': 0.06901940703392029, 'timestamp': '2025-09-30 22:29:04.960171', 'step': 13872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:05.008314', 'step': 13872, 'epoch': 2} {'type': 'loss', 'content': 0.08291914314031601, 'timestamp': '2025-09-30 22:29:05.017998', 'step': 13873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:05.049823', 'step': 13873, 'epoch': 2} {'type': 'loss', 'content': 0.17821069061756134, 'timestamp': '2025-09-30 22:29:05.054119', 'step': 13874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:05.090502', 'step': 13874, 'epoch': 2} {'type': 'loss', 'content': 0.10173134505748749, 'timestamp': '2025-09-30 22:29:05.093121', 'step': 13875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.126168', 'step': 13875, 'epoch': 2} {'type': 'loss', 'content': 0.08310393989086151, 'timestamp': '2025-09-30 22:29:05.151261', 'step': 13876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.181714', 'step': 13876, 'epoch': 2} {'type': 'loss', 'content': 0.07453417778015137, 'timestamp': '2025-09-30 22:29:05.185945', 'step': 13877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.218412', 'step': 13877, 'epoch': 2} {'type': 'loss', 'content': 0.03800615295767784, 'timestamp': '2025-09-30 22:29:05.223108', 'step': 13878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.266156', 'step': 13878, 'epoch': 2} {'type': 'loss', 'content': 0.14057117700576782, 'timestamp': '2025-09-30 22:29:05.269313', 'step': 13879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.301088', 'step': 13879, 'epoch': 2} {'type': 'loss', 'content': 0.10771242529153824, 'timestamp': '2025-09-30 22:29:05.326510', 'step': 13880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.360683', 'step': 13880, 'epoch': 2} {'type': 'loss', 'content': 0.0820009633898735, 'timestamp': '2025-09-30 22:29:05.382214', 'step': 13881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.414990', 'step': 13881, 'epoch': 2} {'type': 'loss', 'content': 0.16097049415111542, 'timestamp': '2025-09-30 22:29:05.420562', 'step': 13882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.454125', 'step': 13882, 'epoch': 2} {'type': 'loss', 'content': 0.10494919866323471, 'timestamp': '2025-09-30 22:29:05.458581', 'step': 13883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:05.491481', 'step': 13883, 'epoch': 2} {'type': 'loss', 'content': 0.0801459401845932, 'timestamp': '2025-09-30 22:29:05.516715', 'step': 13884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.548418', 'step': 13884, 'epoch': 2} {'type': 'loss', 'content': 0.04367607831954956, 'timestamp': '2025-09-30 22:29:05.553858', 'step': 13885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.585219', 'step': 13885, 'epoch': 2} {'type': 'loss', 'content': 0.06965446472167969, 'timestamp': '2025-09-30 22:29:05.589606', 'step': 13886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.621090', 'step': 13886, 'epoch': 2} {'type': 'loss', 'content': 0.09687896072864532, 'timestamp': '2025-09-30 22:29:05.640115', 'step': 13887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:05.674167', 'step': 13887, 'epoch': 2} {'type': 'loss', 'content': 0.10555449873209, 'timestamp': '2025-09-30 22:29:05.699924', 'step': 13888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.732345', 'step': 13888, 'epoch': 2} {'type': 'loss', 'content': 0.08213204890489578, 'timestamp': '2025-09-30 22:29:05.737000', 'step': 13889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:05.770690', 'step': 13889, 'epoch': 2} {'type': 'loss', 'content': 0.1288248747587204, 'timestamp': '2025-09-30 22:29:05.775626', 'step': 13890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:05.817319', 'step': 13890, 'epoch': 2} {'type': 'loss', 'content': 0.0655483677983284, 'timestamp': '2025-09-30 22:29:05.820533', 'step': 13891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:05.861697', 'step': 13891, 'epoch': 2} {'type': 'loss', 'content': 0.124488465487957, 'timestamp': '2025-09-30 22:29:05.887345', 'step': 13892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.918452', 'step': 13892, 'epoch': 2} {'type': 'loss', 'content': 0.06497840583324432, 'timestamp': '2025-09-30 22:29:05.922050', 'step': 13893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:05.954577', 'step': 13893, 'epoch': 2} {'type': 'loss', 'content': 0.04762178286910057, 'timestamp': '2025-09-30 22:29:05.960177', 'step': 13894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:05.991579', 'step': 13894, 'epoch': 2} {'type': 'loss', 'content': 0.136217400431633, 'timestamp': '2025-09-30 22:29:05.995442', 'step': 13895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.028646', 'step': 13895, 'epoch': 2} {'type': 'loss', 'content': 0.1403060108423233, 'timestamp': '2025-09-30 22:29:06.053942', 'step': 13896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.103011', 'step': 13896, 'epoch': 2} {'type': 'loss', 'content': 0.12949396669864655, 'timestamp': '2025-09-30 22:29:06.107360', 'step': 13897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.140467', 'step': 13897, 'epoch': 2} {'type': 'loss', 'content': 0.06341090053319931, 'timestamp': '2025-09-30 22:29:06.144461', 'step': 13898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.180563', 'step': 13898, 'epoch': 2} {'type': 'loss', 'content': 0.09551484137773514, 'timestamp': '2025-09-30 22:29:06.185262', 'step': 13899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.217475', 'step': 13899, 'epoch': 2} {'type': 'loss', 'content': 0.045748814940452576, 'timestamp': '2025-09-30 22:29:06.243067', 'step': 13900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.292702', 'step': 13900, 'epoch': 2} {'type': 'loss', 'content': 0.11326376348733902, 'timestamp': '2025-09-30 22:29:06.298511', 'step': 13901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.336233', 'step': 13901, 'epoch': 2} {'type': 'loss', 'content': 0.10738585144281387, 'timestamp': '2025-09-30 22:29:06.340830', 'step': 13902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.373858', 'step': 13902, 'epoch': 2} {'type': 'loss', 'content': 0.0480104424059391, 'timestamp': '2025-09-30 22:29:06.378051', 'step': 13903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.410168', 'step': 13903, 'epoch': 2} {'type': 'loss', 'content': 0.12672139704227448, 'timestamp': '2025-09-30 22:29:06.438082', 'step': 13904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:06.490419', 'step': 13904, 'epoch': 2} {'type': 'loss', 'content': 0.12502245604991913, 'timestamp': '2025-09-30 22:29:06.495030', 'step': 13905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.526874', 'step': 13905, 'epoch': 2} {'type': 'loss', 'content': 0.14772526919841766, 'timestamp': '2025-09-30 22:29:06.533344', 'step': 13906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:06.582725', 'step': 13906, 'epoch': 2} {'type': 'loss', 'content': 0.1676081120967865, 'timestamp': '2025-09-30 22:29:06.600898', 'step': 13907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.646108', 'step': 13907, 'epoch': 2} {'type': 'loss', 'content': 0.12663614749908447, 'timestamp': '2025-09-30 22:29:06.671188', 'step': 13908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.703756', 'step': 13908, 'epoch': 2} {'type': 'loss', 'content': 0.06419172883033752, 'timestamp': '2025-09-30 22:29:06.708343', 'step': 13909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:06.740060', 'step': 13909, 'epoch': 2} {'type': 'loss', 'content': 0.06187410280108452, 'timestamp': '2025-09-30 22:29:06.749440', 'step': 13910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.784673', 'step': 13910, 'epoch': 2} {'type': 'loss', 'content': 0.03658226132392883, 'timestamp': '2025-09-30 22:29:06.790430', 'step': 13911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:06.838100', 'step': 13911, 'epoch': 2} {'type': 'loss', 'content': 0.06629828363656998, 'timestamp': '2025-09-30 22:29:06.863740', 'step': 13912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.896802', 'step': 13912, 'epoch': 2} {'type': 'loss', 'content': 0.09061062335968018, 'timestamp': '2025-09-30 22:29:06.911881', 'step': 13913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:06.955218', 'step': 13913, 'epoch': 2} {'type': 'loss', 'content': 0.11285027861595154, 'timestamp': '2025-09-30 22:29:06.960017', 'step': 13914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:07.002267', 'step': 13914, 'epoch': 2} {'type': 'loss', 'content': 0.08082211762666702, 'timestamp': '2025-09-30 22:29:07.007632', 'step': 13915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:07.042282', 'step': 13915, 'epoch': 2} {'type': 'loss', 'content': 0.11941681802272797, 'timestamp': '2025-09-30 22:29:07.078051', 'step': 13916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:07.110176', 'step': 13916, 'epoch': 2} {'type': 'loss', 'content': 0.0831160694360733, 'timestamp': '2025-09-30 22:29:07.116595', 'step': 13917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:07.148367', 'step': 13917, 'epoch': 2} {'type': 'loss', 'content': 0.062059927731752396, 'timestamp': '2025-09-30 22:29:07.152422', 'step': 13918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.184265', 'step': 13918, 'epoch': 2} {'type': 'loss', 'content': 0.13772465288639069, 'timestamp': '2025-09-30 22:29:07.188519', 'step': 13919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:07.221864', 'step': 13919, 'epoch': 2} {'type': 'loss', 'content': 0.06611169129610062, 'timestamp': '2025-09-30 22:29:07.259977', 'step': 13920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.291931', 'step': 13920, 'epoch': 2} {'type': 'loss', 'content': 0.09964936226606369, 'timestamp': '2025-09-30 22:29:07.295813', 'step': 13921, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:07.337331', 'step': 13921, 'epoch': 2} {'type': 'loss', 'content': 0.05752946436405182, 'timestamp': '2025-09-30 22:29:07.339680', 'step': 13922, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:07.373024', 'step': 13922, 'epoch': 2} {'type': 'loss', 'content': 0.15443706512451172, 'timestamp': '2025-09-30 22:29:07.389962', 'step': 13923, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:07.432597', 'step': 13923, 'epoch': 2} {'type': 'loss', 'content': 0.12829074263572693, 'timestamp': '2025-09-30 22:29:07.458207', 'step': 13924, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.505364', 'step': 13924, 'epoch': 2} {'type': 'loss', 'content': 0.12420208007097244, 'timestamp': '2025-09-30 22:29:07.518616', 'step': 13925, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.550789', 'step': 13925, 'epoch': 2} {'type': 'loss', 'content': 0.055117253214120865, 'timestamp': '2025-09-30 22:29:07.554500', 'step': 13926, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:07.587341', 'step': 13926, 'epoch': 2} {'type': 'loss', 'content': 0.06712237745523453, 'timestamp': '2025-09-30 22:29:07.593823', 'step': 13927, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.628369', 'step': 13927, 'epoch': 2} {'type': 'loss', 'content': 0.14586511254310608, 'timestamp': '2025-09-30 22:29:07.655119', 'step': 13928, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.685856', 'step': 13928, 'epoch': 2} {'type': 'loss', 'content': 0.07208757847547531, 'timestamp': '2025-09-30 22:29:07.690657', 'step': 13929, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.733032', 'step': 13929, 'epoch': 2} {'type': 'loss', 'content': 0.034326907247304916, 'timestamp': '2025-09-30 22:29:07.739413', 'step': 13930, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:07.773181', 'step': 13930, 'epoch': 2} {'type': 'loss', 'content': 0.1019517183303833, 'timestamp': '2025-09-30 22:29:07.789407', 'step': 13931, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.821399', 'step': 13931, 'epoch': 2} {'type': 'loss', 'content': 0.057235244661569595, 'timestamp': '2025-09-30 22:29:07.845974', 'step': 13932, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:07.877936', 'step': 13932, 'epoch': 2} {'type': 'loss', 'content': 0.12009600549936295, 'timestamp': '2025-09-30 22:29:07.881903', 'step': 13933, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:07.913950', 'step': 13933, 'epoch': 2} {'type': 'loss', 'content': 0.07192715257406235, 'timestamp': '2025-09-30 22:29:07.930675', 'step': 13934, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:07.977359', 'step': 13934, 'epoch': 2} {'type': 'loss', 'content': 0.12175681442022324, 'timestamp': '2025-09-30 22:29:07.995387', 'step': 13935, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.044386', 'step': 13935, 'epoch': 2} {'type': 'loss', 'content': 0.0515168122947216, 'timestamp': '2025-09-30 22:29:08.072064', 'step': 13936, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.105809', 'step': 13936, 'epoch': 2} {'type': 'loss', 'content': 0.09961821138858795, 'timestamp': '2025-09-30 22:29:08.110496', 'step': 13937, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.144289', 'step': 13937, 'epoch': 2} {'type': 'loss', 'content': 0.0758233293890953, 'timestamp': '2025-09-30 22:29:08.162656', 'step': 13938, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.205425', 'step': 13938, 'epoch': 2} {'type': 'loss', 'content': 0.14254717528820038, 'timestamp': '2025-09-30 22:29:08.210085', 'step': 13939, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.242450', 'step': 13939, 'epoch': 2} {'type': 'loss', 'content': 0.12463542819023132, 'timestamp': '2025-09-30 22:29:08.268888', 'step': 13940, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.317842', 'step': 13940, 'epoch': 2} {'type': 'loss', 'content': 0.1330818235874176, 'timestamp': '2025-09-30 22:29:08.323944', 'step': 13941, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.356835', 'step': 13941, 'epoch': 2} {'type': 'loss', 'content': 0.105390265583992, 'timestamp': '2025-09-30 22:29:08.363119', 'step': 13942, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:08.409862', 'step': 13942, 'epoch': 2} {'type': 'loss', 'content': 0.10376429557800293, 'timestamp': '2025-09-30 22:29:08.421800', 'step': 13943, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.464818', 'step': 13943, 'epoch': 2} {'type': 'loss', 'content': 0.09854195266962051, 'timestamp': '2025-09-30 22:29:08.491503', 'step': 13944, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.524388', 'step': 13944, 'epoch': 2} {'type': 'loss', 'content': 0.12365902215242386, 'timestamp': '2025-09-30 22:29:08.529672', 'step': 13945, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:08.574947', 'step': 13945, 'epoch': 2} {'type': 'loss', 'content': 0.08469723165035248, 'timestamp': '2025-09-30 22:29:08.579687', 'step': 13946, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.613373', 'step': 13946, 'epoch': 2} {'type': 'loss', 'content': 0.130880206823349, 'timestamp': '2025-09-30 22:29:08.618443', 'step': 13947, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:08.650992', 'step': 13947, 'epoch': 2} {'type': 'loss', 'content': 0.08690325915813446, 'timestamp': '2025-09-30 22:29:08.677879', 'step': 13948, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:08.711271', 'step': 13948, 'epoch': 2} {'type': 'loss', 'content': 0.09956618398427963, 'timestamp': '2025-09-30 22:29:08.715469', 'step': 13949, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.761782', 'step': 13949, 'epoch': 2} {'type': 'loss', 'content': 0.08726733177900314, 'timestamp': '2025-09-30 22:29:08.767287', 'step': 13950, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.799203', 'step': 13950, 'epoch': 2} {'type': 'loss', 'content': 0.0950205996632576, 'timestamp': '2025-09-30 22:29:08.802759', 'step': 13951, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:08.835156', 'step': 13951, 'epoch': 2} {'type': 'loss', 'content': 0.14642420411109924, 'timestamp': '2025-09-30 22:29:08.860514', 'step': 13952, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:08.892002', 'step': 13952, 'epoch': 2} {'type': 'loss', 'content': 0.12731590867042542, 'timestamp': '2025-09-30 22:29:08.895804', 'step': 13953, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:08.929004', 'step': 13953, 'epoch': 2} {'type': 'loss', 'content': 0.08228640258312225, 'timestamp': '2025-09-30 22:29:08.948715', 'step': 13954, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:08.980643', 'step': 13954, 'epoch': 2} {'type': 'loss', 'content': 0.09412137418985367, 'timestamp': '2025-09-30 22:29:08.992130', 'step': 13955, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:09.035884', 'step': 13955, 'epoch': 2} {'type': 'loss', 'content': 0.09777698665857315, 'timestamp': '2025-09-30 22:29:09.060771', 'step': 13956, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.104700', 'step': 13956, 'epoch': 2} {'type': 'loss', 'content': 0.07517767697572708, 'timestamp': '2025-09-30 22:29:09.120040', 'step': 13957, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:09.163965', 'step': 13957, 'epoch': 2} {'type': 'loss', 'content': 0.13951125741004944, 'timestamp': '2025-09-30 22:29:09.169028', 'step': 13958, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.212158', 'step': 13958, 'epoch': 2} {'type': 'loss', 'content': 0.11814584583044052, 'timestamp': '2025-09-30 22:29:09.220859', 'step': 13959, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:09.254662', 'step': 13959, 'epoch': 2} {'type': 'loss', 'content': 0.183680459856987, 'timestamp': '2025-09-30 22:29:09.280767', 'step': 13960, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:09.325149', 'step': 13960, 'epoch': 2} {'type': 'loss', 'content': 0.12539462745189667, 'timestamp': '2025-09-30 22:29:09.329298', 'step': 13961, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:09.362338', 'step': 13961, 'epoch': 2} {'type': 'loss', 'content': 0.12247447669506073, 'timestamp': '2025-09-30 22:29:09.382427', 'step': 13962, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.433161', 'step': 13962, 'epoch': 2} {'type': 'loss', 'content': 0.10355599969625473, 'timestamp': '2025-09-30 22:29:09.436915', 'step': 13963, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:09.474129', 'step': 13963, 'epoch': 2} {'type': 'loss', 'content': 0.1524634063243866, 'timestamp': '2025-09-30 22:29:09.515538', 'step': 13964, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.547039', 'step': 13964, 'epoch': 2} {'type': 'loss', 'content': 0.07231613248586655, 'timestamp': '2025-09-30 22:29:09.550580', 'step': 13965, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.580893', 'step': 13965, 'epoch': 2} {'type': 'loss', 'content': 0.06662943959236145, 'timestamp': '2025-09-30 22:29:09.585001', 'step': 13966, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:09.615367', 'step': 13966, 'epoch': 2} {'type': 'loss', 'content': 0.11625728756189346, 'timestamp': '2025-09-30 22:29:09.619706', 'step': 13967, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:09.652813', 'step': 13967, 'epoch': 2} {'type': 'loss', 'content': 0.11433013528585434, 'timestamp': '2025-09-30 22:29:09.678217', 'step': 13968, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:09.709190', 'step': 13968, 'epoch': 2} {'type': 'loss', 'content': 0.1298810839653015, 'timestamp': '2025-09-30 22:29:09.714003', 'step': 13969, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.746420', 'step': 13969, 'epoch': 2} {'type': 'loss', 'content': 0.07052723318338394, 'timestamp': '2025-09-30 22:29:09.751577', 'step': 13970, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.787836', 'step': 13970, 'epoch': 2} {'type': 'loss', 'content': 0.11985146254301071, 'timestamp': '2025-09-30 22:29:09.791542', 'step': 13971, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.833851', 'step': 13971, 'epoch': 2} {'type': 'loss', 'content': 0.11886342614889145, 'timestamp': '2025-09-30 22:29:09.871043', 'step': 13972, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:09.905185', 'step': 13972, 'epoch': 2} {'type': 'loss', 'content': 0.04612276703119278, 'timestamp': '2025-09-30 22:29:09.909164', 'step': 13973, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:09.941177', 'step': 13973, 'epoch': 2} {'type': 'loss', 'content': 0.12575656175613403, 'timestamp': '2025-09-30 22:29:09.947521', 'step': 13974, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:09.978602', 'step': 13974, 'epoch': 2} {'type': 'loss', 'content': 0.11719948053359985, 'timestamp': '2025-09-30 22:29:09.982923', 'step': 13975, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:10.014603', 'step': 13975, 'epoch': 2} {'type': 'loss', 'content': 0.1746179610490799, 'timestamp': '2025-09-30 22:29:10.044242', 'step': 13976, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:10.077890', 'step': 13976, 'epoch': 2} {'type': 'loss', 'content': 0.07910635322332382, 'timestamp': '2025-09-30 22:29:10.084409', 'step': 13977, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:10.117903', 'step': 13977, 'epoch': 2} {'type': 'loss', 'content': 0.09551599621772766, 'timestamp': '2025-09-30 22:29:10.122256', 'step': 13978, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:10.156239', 'step': 13978, 'epoch': 2} {'type': 'loss', 'content': 0.16486912965774536, 'timestamp': '2025-09-30 22:29:10.160068', 'step': 13979, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:10.201395', 'step': 13979, 'epoch': 2} {'type': 'loss', 'content': 0.0872901901602745, 'timestamp': '2025-09-30 22:29:10.226510', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:29:18.861429', 'step': 13980, 'epoch': 2} {'type': 'pplx', 'content': 8463.869710303976, 'timestamp': '2025-09-30 22:29:18.868050', 'step': 13980, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:18.898468', 'step': 13980, 'epoch': 2} {'type': 'loss', 'content': 0.06223267316818237, 'timestamp': '2025-09-30 22:29:18.905248', 'step': 13981, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:18.940736', 'step': 13981, 'epoch': 2} {'type': 'loss', 'content': 0.09273538738489151, 'timestamp': '2025-09-30 22:29:18.945325', 'step': 13982, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:18.983464', 'step': 13982, 'epoch': 2} {'type': 'loss', 'content': 0.11968409270048141, 'timestamp': '2025-09-30 22:29:18.989624', 'step': 13983, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.034720', 'step': 13983, 'epoch': 2} {'type': 'loss', 'content': 0.1335768699645996, 'timestamp': '2025-09-30 22:29:19.062656', 'step': 13984, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.100898', 'step': 13984, 'epoch': 2} {'type': 'loss', 'content': 0.09400027990341187, 'timestamp': '2025-09-30 22:29:19.104472', 'step': 13985, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.136034', 'step': 13985, 'epoch': 2} {'type': 'loss', 'content': 0.05710604041814804, 'timestamp': '2025-09-30 22:29:19.143657', 'step': 13986, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:19.181274', 'step': 13986, 'epoch': 2} {'type': 'loss', 'content': 0.15255892276763916, 'timestamp': '2025-09-30 22:29:19.185427', 'step': 13987, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.220898', 'step': 13987, 'epoch': 2} {'type': 'loss', 'content': 0.06727565824985504, 'timestamp': '2025-09-30 22:29:19.246995', 'step': 13988, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:19.289286', 'step': 13988, 'epoch': 2} {'type': 'loss', 'content': 0.10550407320261002, 'timestamp': '2025-09-30 22:29:19.293237', 'step': 13989, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.331648', 'step': 13989, 'epoch': 2} {'type': 'loss', 'content': 0.19597089290618896, 'timestamp': '2025-09-30 22:29:19.336183', 'step': 13990, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:19.377073', 'step': 13990, 'epoch': 2} {'type': 'loss', 'content': 0.1424761712551117, 'timestamp': '2025-09-30 22:29:19.381042', 'step': 13991, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:19.424894', 'step': 13991, 'epoch': 2} {'type': 'loss', 'content': 0.0881507620215416, 'timestamp': '2025-09-30 22:29:19.450072', 'step': 13992, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:19.495218', 'step': 13992, 'epoch': 2} {'type': 'loss', 'content': 0.09568950533866882, 'timestamp': '2025-09-30 22:29:19.501395', 'step': 13993, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.545026', 'step': 13993, 'epoch': 2} {'type': 'loss', 'content': 0.10235576331615448, 'timestamp': '2025-09-30 22:29:19.551253', 'step': 13994, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:19.586324', 'step': 13994, 'epoch': 2} {'type': 'loss', 'content': 0.017870675772428513, 'timestamp': '2025-09-30 22:29:19.595994', 'step': 13995, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.632418', 'step': 13995, 'epoch': 2} {'type': 'loss', 'content': 0.054890044033527374, 'timestamp': '2025-09-30 22:29:19.660070', 'step': 13996, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:19.697777', 'step': 13996, 'epoch': 2} {'type': 'loss', 'content': 0.014566098339855671, 'timestamp': '2025-09-30 22:29:19.701360', 'step': 13997, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.733838', 'step': 13997, 'epoch': 2} {'type': 'loss', 'content': 0.09556880593299866, 'timestamp': '2025-09-30 22:29:19.741332', 'step': 13998, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.779607', 'step': 13998, 'epoch': 2} {'type': 'loss', 'content': 0.11900409311056137, 'timestamp': '2025-09-30 22:29:19.784109', 'step': 13999, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:19.821729', 'step': 13999, 'epoch': 2} {'type': 'loss', 'content': 0.10188791900873184, 'timestamp': '2025-09-30 22:29:19.846893', 'step': 14000, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14000', 'timestamp': '2025-09-30 22:29:24.895425', 'step': 14000, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:24.933027', 'step': 14000, 'epoch': 2} {'type': 'loss', 'content': 0.09396570175886154, 'timestamp': '2025-09-30 22:29:24.945623', 'step': 14001, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:24.987627', 'step': 14001, 'epoch': 2} {'type': 'loss', 'content': 0.10577753931283951, 'timestamp': '2025-09-30 22:29:24.992666', 'step': 14002, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:25.024261', 'step': 14002, 'epoch': 2} {'type': 'loss', 'content': 0.11930326372385025, 'timestamp': '2025-09-30 22:29:25.028130', 'step': 14003, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:25.072263', 'step': 14003, 'epoch': 2} {'type': 'loss', 'content': 0.09000508487224579, 'timestamp': '2025-09-30 22:29:25.098609', 'step': 14004, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:25.132827', 'step': 14004, 'epoch': 2} {'type': 'loss', 'content': 0.05953872948884964, 'timestamp': '2025-09-30 22:29:25.149122', 'step': 14005, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:25.192473', 'step': 14005, 'epoch': 2} {'type': 'loss', 'content': 0.0864606648683548, 'timestamp': '2025-09-30 22:29:25.209480', 'step': 14006, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:25.253744', 'step': 14006, 'epoch': 2} {'type': 'loss', 'content': 0.040089771151542664, 'timestamp': '2025-09-30 22:29:25.259321', 'step': 14007, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.292064', 'step': 14007, 'epoch': 2} {'type': 'loss', 'content': 0.10836221277713776, 'timestamp': '2025-09-30 22:29:25.319145', 'step': 14008, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.351576', 'step': 14008, 'epoch': 2} {'type': 'loss', 'content': 0.08972284197807312, 'timestamp': '2025-09-30 22:29:25.357016', 'step': 14009, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:25.396588', 'step': 14009, 'epoch': 2} {'type': 'loss', 'content': 0.09344746172428131, 'timestamp': '2025-09-30 22:29:25.409599', 'step': 14010, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:25.442336', 'step': 14010, 'epoch': 2} {'type': 'loss', 'content': 0.13444243371486664, 'timestamp': '2025-09-30 22:29:25.447548', 'step': 14011, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:25.479780', 'step': 14011, 'epoch': 2} {'type': 'loss', 'content': 0.11686761677265167, 'timestamp': '2025-09-30 22:29:25.508448', 'step': 14012, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:25.539110', 'step': 14012, 'epoch': 2} {'type': 'loss', 'content': 0.11344725638628006, 'timestamp': '2025-09-30 22:29:25.552698', 'step': 14013, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:25.586799', 'step': 14013, 'epoch': 2} {'type': 'loss', 'content': 0.015647072345018387, 'timestamp': '2025-09-30 22:29:25.590416', 'step': 14014, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.623466', 'step': 14014, 'epoch': 2} {'type': 'loss', 'content': 0.12495293468236923, 'timestamp': '2025-09-30 22:29:25.629178', 'step': 14015, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:25.664689', 'step': 14015, 'epoch': 2} {'type': 'loss', 'content': 0.17682312428951263, 'timestamp': '2025-09-30 22:29:25.698059', 'step': 14016, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:25.730731', 'step': 14016, 'epoch': 2} {'type': 'loss', 'content': 0.08619590103626251, 'timestamp': '2025-09-30 22:29:25.734978', 'step': 14017, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:25.785265', 'step': 14017, 'epoch': 2} {'type': 'loss', 'content': 0.09579801559448242, 'timestamp': '2025-09-30 22:29:25.792992', 'step': 14018, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:25.826344', 'step': 14018, 'epoch': 2} {'type': 'loss', 'content': 0.11474308371543884, 'timestamp': '2025-09-30 22:29:25.829735', 'step': 14019, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.860338', 'step': 14019, 'epoch': 2} {'type': 'loss', 'content': 0.09905669093132019, 'timestamp': '2025-09-30 22:29:25.885928', 'step': 14020, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.917309', 'step': 14020, 'epoch': 2} {'type': 'loss', 'content': 0.0942232608795166, 'timestamp': '2025-09-30 22:29:25.922145', 'step': 14021, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:25.954234', 'step': 14021, 'epoch': 2} {'type': 'loss', 'content': 0.150776669383049, 'timestamp': '2025-09-30 22:29:25.965846', 'step': 14022, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.005093', 'step': 14022, 'epoch': 2} {'type': 'loss', 'content': 0.14474107325077057, 'timestamp': '2025-09-30 22:29:26.009997', 'step': 14023, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:26.041727', 'step': 14023, 'epoch': 2} {'type': 'loss', 'content': 0.16110043227672577, 'timestamp': '2025-09-30 22:29:26.068205', 'step': 14024, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:26.106774', 'step': 14024, 'epoch': 2} {'type': 'loss', 'content': 0.120622418820858, 'timestamp': '2025-09-30 22:29:26.116783', 'step': 14025, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.150426', 'step': 14025, 'epoch': 2} {'type': 'loss', 'content': 0.12358372658491135, 'timestamp': '2025-09-30 22:29:26.155539', 'step': 14026, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.187063', 'step': 14026, 'epoch': 2} {'type': 'loss', 'content': 0.1018257886171341, 'timestamp': '2025-09-30 22:29:26.197988', 'step': 14027, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.232849', 'step': 14027, 'epoch': 2} {'type': 'loss', 'content': 0.11245409399271011, 'timestamp': '2025-09-30 22:29:26.265625', 'step': 14028, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.297388', 'step': 14028, 'epoch': 2} {'type': 'loss', 'content': 0.10333913564682007, 'timestamp': '2025-09-30 22:29:26.302519', 'step': 14029, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:26.333162', 'step': 14029, 'epoch': 2} {'type': 'loss', 'content': 0.16154097020626068, 'timestamp': '2025-09-30 22:29:26.338806', 'step': 14030, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:26.386180', 'step': 14030, 'epoch': 2} {'type': 'loss', 'content': 0.10417689383029938, 'timestamp': '2025-09-30 22:29:26.392362', 'step': 14031, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:26.427545', 'step': 14031, 'epoch': 2} {'type': 'loss', 'content': 0.10328230261802673, 'timestamp': '2025-09-30 22:29:26.461561', 'step': 14032, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.503990', 'step': 14032, 'epoch': 2} {'type': 'loss', 'content': 0.06719104200601578, 'timestamp': '2025-09-30 22:29:26.518281', 'step': 14033, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:26.550866', 'step': 14033, 'epoch': 2} {'type': 'loss', 'content': 0.10604987293481827, 'timestamp': '2025-09-30 22:29:26.553859', 'step': 14034, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.586468', 'step': 14034, 'epoch': 2} {'type': 'loss', 'content': 0.1317361742258072, 'timestamp': '2025-09-30 22:29:26.597592', 'step': 14035, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:26.630548', 'step': 14035, 'epoch': 2} {'type': 'loss', 'content': 0.1108240857720375, 'timestamp': '2025-09-30 22:29:26.659866', 'step': 14036, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:26.700338', 'step': 14036, 'epoch': 2} {'type': 'loss', 'content': 0.08030733466148376, 'timestamp': '2025-09-30 22:29:26.704410', 'step': 14037, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.737159', 'step': 14037, 'epoch': 2} {'type': 'loss', 'content': 0.08543964475393295, 'timestamp': '2025-09-30 22:29:26.750511', 'step': 14038, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:26.789519', 'step': 14038, 'epoch': 2} {'type': 'loss', 'content': 0.07941858470439911, 'timestamp': '2025-09-30 22:29:26.804363', 'step': 14039, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:26.849207', 'step': 14039, 'epoch': 2} {'type': 'loss', 'content': 0.15098091959953308, 'timestamp': '2025-09-30 22:29:26.876084', 'step': 14040, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:26.908015', 'step': 14040, 'epoch': 2} {'type': 'loss', 'content': 0.1422397941350937, 'timestamp': '2025-09-30 22:29:26.911974', 'step': 14041, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:26.945148', 'step': 14041, 'epoch': 2} {'type': 'loss', 'content': 0.0727979764342308, 'timestamp': '2025-09-30 22:29:26.948832', 'step': 14042, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:26.980266', 'step': 14042, 'epoch': 2} {'type': 'loss', 'content': 0.08937841653823853, 'timestamp': '2025-09-30 22:29:26.987217', 'step': 14043, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:27.019081', 'step': 14043, 'epoch': 2} {'type': 'loss', 'content': 0.10850634425878525, 'timestamp': '2025-09-30 22:29:27.054729', 'step': 14044, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:27.086650', 'step': 14044, 'epoch': 2} {'type': 'loss', 'content': 0.16227005422115326, 'timestamp': '2025-09-30 22:29:27.098915', 'step': 14045, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.137637', 'step': 14045, 'epoch': 2} {'type': 'loss', 'content': 0.042007576674222946, 'timestamp': '2025-09-30 22:29:27.140633', 'step': 14046, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:27.173631', 'step': 14046, 'epoch': 2} {'type': 'loss', 'content': 0.09852893650531769, 'timestamp': '2025-09-30 22:29:27.187554', 'step': 14047, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.229425', 'step': 14047, 'epoch': 2} {'type': 'loss', 'content': 0.1387675404548645, 'timestamp': '2025-09-30 22:29:27.254603', 'step': 14048, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:27.287399', 'step': 14048, 'epoch': 2} {'type': 'loss', 'content': 0.105458103120327, 'timestamp': '2025-09-30 22:29:27.291449', 'step': 14049, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:27.331110', 'step': 14049, 'epoch': 2} {'type': 'loss', 'content': 0.09922129660844803, 'timestamp': '2025-09-30 22:29:27.335303', 'step': 14050, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:27.367065', 'step': 14050, 'epoch': 2} {'type': 'loss', 'content': 0.10195185244083405, 'timestamp': '2025-09-30 22:29:27.369941', 'step': 14051, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:27.404209', 'step': 14051, 'epoch': 2} {'type': 'loss', 'content': 0.18102005124092102, 'timestamp': '2025-09-30 22:29:27.433124', 'step': 14052, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.464884', 'step': 14052, 'epoch': 2} {'type': 'loss', 'content': 0.0726504921913147, 'timestamp': '2025-09-30 22:29:27.469462', 'step': 14053, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.504458', 'step': 14053, 'epoch': 2} {'type': 'loss', 'content': 0.11033644527196884, 'timestamp': '2025-09-30 22:29:27.510686', 'step': 14054, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:27.553504', 'step': 14054, 'epoch': 2} {'type': 'loss', 'content': 0.05748332291841507, 'timestamp': '2025-09-30 22:29:27.559249', 'step': 14055, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:27.592865', 'step': 14055, 'epoch': 2} {'type': 'loss', 'content': 0.07483775168657303, 'timestamp': '2025-09-30 22:29:27.618544', 'step': 14056, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:27.652877', 'step': 14056, 'epoch': 2} {'type': 'loss', 'content': 0.09753730893135071, 'timestamp': '2025-09-30 22:29:27.656834', 'step': 14057, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:27.688394', 'step': 14057, 'epoch': 2} {'type': 'loss', 'content': 0.02347320131957531, 'timestamp': '2025-09-30 22:29:27.699387', 'step': 14058, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:27.731900', 'step': 14058, 'epoch': 2} {'type': 'loss', 'content': 0.04635332524776459, 'timestamp': '2025-09-30 22:29:27.746926', 'step': 14059, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:27.788617', 'step': 14059, 'epoch': 2} {'type': 'loss', 'content': 0.07143067568540573, 'timestamp': '2025-09-30 22:29:27.825729', 'step': 14060, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:27.859389', 'step': 14060, 'epoch': 2} {'type': 'loss', 'content': 0.05866977199912071, 'timestamp': '2025-09-30 22:29:27.869651', 'step': 14061, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.903172', 'step': 14061, 'epoch': 2} {'type': 'loss', 'content': 0.09426513314247131, 'timestamp': '2025-09-30 22:29:27.915103', 'step': 14062, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:27.948561', 'step': 14062, 'epoch': 2} {'type': 'loss', 'content': 0.11999818682670593, 'timestamp': '2025-09-30 22:29:27.951484', 'step': 14063, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:27.984645', 'step': 14063, 'epoch': 2} {'type': 'loss', 'content': 0.16038905084133148, 'timestamp': '2025-09-30 22:29:28.011912', 'step': 14064, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:28.043407', 'step': 14064, 'epoch': 2} {'type': 'loss', 'content': 0.04018720984458923, 'timestamp': '2025-09-30 22:29:28.051797', 'step': 14065, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.085633', 'step': 14065, 'epoch': 2} {'type': 'loss', 'content': 0.0931382104754448, 'timestamp': '2025-09-30 22:29:28.090159', 'step': 14066, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:28.124481', 'step': 14066, 'epoch': 2} {'type': 'loss', 'content': 0.06981886178255081, 'timestamp': '2025-09-30 22:29:28.134179', 'step': 14067, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.168796', 'step': 14067, 'epoch': 2} {'type': 'loss', 'content': 0.07191790640354156, 'timestamp': '2025-09-30 22:29:28.195762', 'step': 14068, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:28.229237', 'step': 14068, 'epoch': 2} {'type': 'loss', 'content': 0.13066810369491577, 'timestamp': '2025-09-30 22:29:28.244086', 'step': 14069, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:28.277802', 'step': 14069, 'epoch': 2} {'type': 'loss', 'content': 0.14851894974708557, 'timestamp': '2025-09-30 22:29:28.287132', 'step': 14070, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:28.330286', 'step': 14070, 'epoch': 2} {'type': 'loss', 'content': 0.12417275458574295, 'timestamp': '2025-09-30 22:29:28.347297', 'step': 14071, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.386275', 'step': 14071, 'epoch': 2} {'type': 'loss', 'content': 0.10525815188884735, 'timestamp': '2025-09-30 22:29:28.411336', 'step': 14072, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.444105', 'step': 14072, 'epoch': 2} {'type': 'loss', 'content': 0.08498818427324295, 'timestamp': '2025-09-30 22:29:28.448561', 'step': 14073, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.481084', 'step': 14073, 'epoch': 2} {'type': 'loss', 'content': 0.13176904618740082, 'timestamp': '2025-09-30 22:29:28.486148', 'step': 14074, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.527461', 'step': 14074, 'epoch': 2} {'type': 'loss', 'content': 0.05053115263581276, 'timestamp': '2025-09-30 22:29:28.530164', 'step': 14075, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.562205', 'step': 14075, 'epoch': 2} {'type': 'loss', 'content': 0.14516699314117432, 'timestamp': '2025-09-30 22:29:28.595724', 'step': 14076, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:28.638740', 'step': 14076, 'epoch': 2} {'type': 'loss', 'content': 0.06825749576091766, 'timestamp': '2025-09-30 22:29:28.643558', 'step': 14077, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:28.677192', 'step': 14077, 'epoch': 2} {'type': 'loss', 'content': 0.0200041551142931, 'timestamp': '2025-09-30 22:29:28.681473', 'step': 14078, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.719343', 'step': 14078, 'epoch': 2} {'type': 'loss', 'content': 0.07147970050573349, 'timestamp': '2025-09-30 22:29:28.722131', 'step': 14079, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.761304', 'step': 14079, 'epoch': 2} {'type': 'loss', 'content': 0.09489055722951889, 'timestamp': '2025-09-30 22:29:28.787598', 'step': 14080, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:28.820456', 'step': 14080, 'epoch': 2} {'type': 'loss', 'content': 0.07654304802417755, 'timestamp': '2025-09-30 22:29:28.826394', 'step': 14081, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:28.859733', 'step': 14081, 'epoch': 2} {'type': 'loss', 'content': 0.10964594781398773, 'timestamp': '2025-09-30 22:29:28.864895', 'step': 14082, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.906993', 'step': 14082, 'epoch': 2} {'type': 'loss', 'content': 0.09513168036937714, 'timestamp': '2025-09-30 22:29:28.910705', 'step': 14083, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:28.952152', 'step': 14083, 'epoch': 2} {'type': 'loss', 'content': 0.06762970238924026, 'timestamp': '2025-09-30 22:29:28.988899', 'step': 14084, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:29.022390', 'step': 14084, 'epoch': 2} {'type': 'loss', 'content': 0.14820368587970734, 'timestamp': '2025-09-30 22:29:29.038140', 'step': 14085, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.077645', 'step': 14085, 'epoch': 2} {'type': 'loss', 'content': 0.0882127583026886, 'timestamp': '2025-09-30 22:29:29.081472', 'step': 14086, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.114154', 'step': 14086, 'epoch': 2} {'type': 'loss', 'content': 0.04475283622741699, 'timestamp': '2025-09-30 22:29:29.136475', 'step': 14087, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.171524', 'step': 14087, 'epoch': 2} {'type': 'loss', 'content': 0.0934852883219719, 'timestamp': '2025-09-30 22:29:29.208641', 'step': 14088, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.242165', 'step': 14088, 'epoch': 2} {'type': 'loss', 'content': 0.16502633690834045, 'timestamp': '2025-09-30 22:29:29.247256', 'step': 14089, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.304718', 'step': 14089, 'epoch': 2} {'type': 'loss', 'content': 0.0905427560210228, 'timestamp': '2025-09-30 22:29:29.309976', 'step': 14090, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.351988', 'step': 14090, 'epoch': 2} {'type': 'loss', 'content': 0.110835961997509, 'timestamp': '2025-09-30 22:29:29.367899', 'step': 14091, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.411778', 'step': 14091, 'epoch': 2} {'type': 'loss', 'content': 0.07811406999826431, 'timestamp': '2025-09-30 22:29:29.437349', 'step': 14092, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.468942', 'step': 14092, 'epoch': 2} {'type': 'loss', 'content': 0.08607377111911774, 'timestamp': '2025-09-30 22:29:29.488363', 'step': 14093, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:29.520722', 'step': 14093, 'epoch': 2} {'type': 'loss', 'content': 0.09949788451194763, 'timestamp': '2025-09-30 22:29:29.531370', 'step': 14094, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.570135', 'step': 14094, 'epoch': 2} {'type': 'loss', 'content': 0.1402709186077118, 'timestamp': '2025-09-30 22:29:29.587675', 'step': 14095, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:29.653147', 'step': 14095, 'epoch': 2} {'type': 'loss', 'content': 0.08454278111457825, 'timestamp': '2025-09-30 22:29:29.685285', 'step': 14096, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.738221', 'step': 14096, 'epoch': 2} {'type': 'loss', 'content': 0.020387139171361923, 'timestamp': '2025-09-30 22:29:29.743138', 'step': 14097, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:29.779455', 'step': 14097, 'epoch': 2} {'type': 'loss', 'content': 0.08704914152622223, 'timestamp': '2025-09-30 22:29:29.795611', 'step': 14098, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:29.848561', 'step': 14098, 'epoch': 2} {'type': 'loss', 'content': 0.09682567417621613, 'timestamp': '2025-09-30 22:29:29.855126', 'step': 14099, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:29.890581', 'step': 14099, 'epoch': 2} {'type': 'loss', 'content': 0.13539624214172363, 'timestamp': '2025-09-30 22:29:29.932298', 'step': 14100, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:29.972535', 'step': 14100, 'epoch': 2} {'type': 'loss', 'content': 0.15576177835464478, 'timestamp': '2025-09-30 22:29:29.986683', 'step': 14101, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:30.023331', 'step': 14101, 'epoch': 2} {'type': 'loss', 'content': 0.035936929285526276, 'timestamp': '2025-09-30 22:29:30.031844', 'step': 14102, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:30.068563', 'step': 14102, 'epoch': 2} {'type': 'loss', 'content': 0.1885777860879898, 'timestamp': '2025-09-30 22:29:30.081022', 'step': 14103, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:30.125333', 'step': 14103, 'epoch': 2} {'type': 'loss', 'content': 0.10182640701532364, 'timestamp': '2025-09-30 22:29:30.150075', 'step': 14104, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:30.202015', 'step': 14104, 'epoch': 2} {'type': 'loss', 'content': 0.04834494739770889, 'timestamp': '2025-09-30 22:29:30.210883', 'step': 14105, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:30.267413', 'step': 14105, 'epoch': 2} {'type': 'loss', 'content': 0.04549376294016838, 'timestamp': '2025-09-30 22:29:30.279808', 'step': 14106, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:30.324785', 'step': 14106, 'epoch': 2} {'type': 'loss', 'content': 0.025686297565698624, 'timestamp': '2025-09-30 22:29:30.338717', 'step': 14107, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:30.386050', 'step': 14107, 'epoch': 2} {'type': 'loss', 'content': 0.0565088652074337, 'timestamp': '2025-09-30 22:29:30.412806', 'step': 14108, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:30.452022', 'step': 14108, 'epoch': 2} {'type': 'loss', 'content': 0.1734900176525116, 'timestamp': '2025-09-30 22:29:30.456013', 'step': 14109, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:30.488106', 'step': 14109, 'epoch': 2} {'type': 'loss', 'content': 0.09152448177337646, 'timestamp': '2025-09-30 22:29:30.497290', 'step': 14110, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:30.539629', 'step': 14110, 'epoch': 2} {'type': 'loss', 'content': 0.06406763941049576, 'timestamp': '2025-09-30 22:29:30.550678', 'step': 14111, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:30.590344', 'step': 14111, 'epoch': 2} {'type': 'loss', 'content': 0.12127397954463959, 'timestamp': '2025-09-30 22:29:30.632487', 'step': 14112, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:30.667320', 'step': 14112, 'epoch': 2} {'type': 'loss', 'content': 0.09124702960252762, 'timestamp': '2025-09-30 22:29:30.693224', 'step': 14113, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:30.727054', 'step': 14113, 'epoch': 2} {'type': 'loss', 'content': 0.08784358948469162, 'timestamp': '2025-09-30 22:29:30.731781', 'step': 14114, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:30.773235', 'step': 14114, 'epoch': 2} {'type': 'loss', 'content': 0.14629864692687988, 'timestamp': '2025-09-30 22:29:30.788106', 'step': 14115, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:30.823133', 'step': 14115, 'epoch': 2} {'type': 'loss', 'content': 0.07958068698644638, 'timestamp': '2025-09-30 22:29:30.850499', 'step': 14116, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:30.893912', 'step': 14116, 'epoch': 2} {'type': 'loss', 'content': 0.07350222766399384, 'timestamp': '2025-09-30 22:29:30.905839', 'step': 14117, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:30.958165', 'step': 14117, 'epoch': 2} {'type': 'loss', 'content': 0.15769663453102112, 'timestamp': '2025-09-30 22:29:30.973739', 'step': 14118, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:31.007986', 'step': 14118, 'epoch': 2} {'type': 'loss', 'content': 0.04030657559633255, 'timestamp': '2025-09-30 22:29:31.017976', 'step': 14119, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.062749', 'step': 14119, 'epoch': 2} {'type': 'loss', 'content': 0.11574143916368484, 'timestamp': '2025-09-30 22:29:31.091745', 'step': 14120, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:31.126261', 'step': 14120, 'epoch': 2} {'type': 'loss', 'content': 0.027819309383630753, 'timestamp': '2025-09-30 22:29:31.130961', 'step': 14121, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:31.164606', 'step': 14121, 'epoch': 2} {'type': 'loss', 'content': 0.05156748369336128, 'timestamp': '2025-09-30 22:29:31.170220', 'step': 14122, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:31.203806', 'step': 14122, 'epoch': 2} {'type': 'loss', 'content': 0.052445296198129654, 'timestamp': '2025-09-30 22:29:31.209885', 'step': 14123, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.244627', 'step': 14123, 'epoch': 2} {'type': 'loss', 'content': 0.08987050503492355, 'timestamp': '2025-09-30 22:29:31.270781', 'step': 14124, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:31.303162', 'step': 14124, 'epoch': 2} {'type': 'loss', 'content': 0.041772469878196716, 'timestamp': '2025-09-30 22:29:31.309183', 'step': 14125, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.346537', 'step': 14125, 'epoch': 2} {'type': 'loss', 'content': 0.0477585569024086, 'timestamp': '2025-09-30 22:29:31.350241', 'step': 14126, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.382404', 'step': 14126, 'epoch': 2} {'type': 'loss', 'content': 0.24712571501731873, 'timestamp': '2025-09-30 22:29:31.387336', 'step': 14127, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.423809', 'step': 14127, 'epoch': 2} {'type': 'loss', 'content': 0.11474296450614929, 'timestamp': '2025-09-30 22:29:31.448502', 'step': 14128, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.490537', 'step': 14128, 'epoch': 2} {'type': 'loss', 'content': 0.08601507544517517, 'timestamp': '2025-09-30 22:29:31.493926', 'step': 14129, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.525854', 'step': 14129, 'epoch': 2} {'type': 'loss', 'content': 0.08682315051555634, 'timestamp': '2025-09-30 22:29:31.535215', 'step': 14130, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:31.567485', 'step': 14130, 'epoch': 2} {'type': 'loss', 'content': 0.01184337493032217, 'timestamp': '2025-09-30 22:29:31.577213', 'step': 14131, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.611562', 'step': 14131, 'epoch': 2} {'type': 'loss', 'content': 0.05788787081837654, 'timestamp': '2025-09-30 22:29:31.637555', 'step': 14132, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.675496', 'step': 14132, 'epoch': 2} {'type': 'loss', 'content': 0.09432575106620789, 'timestamp': '2025-09-30 22:29:31.679467', 'step': 14133, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.723045', 'step': 14133, 'epoch': 2} {'type': 'loss', 'content': 0.14735546708106995, 'timestamp': '2025-09-30 22:29:31.728415', 'step': 14134, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.762841', 'step': 14134, 'epoch': 2} {'type': 'loss', 'content': 0.15102291107177734, 'timestamp': '2025-09-30 22:29:31.780109', 'step': 14135, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:31.812036', 'step': 14135, 'epoch': 2} {'type': 'loss', 'content': 0.06741917878389359, 'timestamp': '2025-09-30 22:29:31.836389', 'step': 14136, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:31.872547', 'step': 14136, 'epoch': 2} {'type': 'loss', 'content': 0.05108095705509186, 'timestamp': '2025-09-30 22:29:31.876799', 'step': 14137, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.909914', 'step': 14137, 'epoch': 2} {'type': 'loss', 'content': 0.10423164814710617, 'timestamp': '2025-09-30 22:29:31.913743', 'step': 14138, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:31.961702', 'step': 14138, 'epoch': 2} {'type': 'loss', 'content': 0.1784469038248062, 'timestamp': '2025-09-30 22:29:31.966917', 'step': 14139, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:31.998482', 'step': 14139, 'epoch': 2} {'type': 'loss', 'content': 0.09186967462301254, 'timestamp': '2025-09-30 22:29:32.027273', 'step': 14140, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:32.060859', 'step': 14140, 'epoch': 2} {'type': 'loss', 'content': 0.042357493191957474, 'timestamp': '2025-09-30 22:29:32.069920', 'step': 14141, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.102604', 'step': 14141, 'epoch': 2} {'type': 'loss', 'content': 0.09127456694841385, 'timestamp': '2025-09-30 22:29:32.106252', 'step': 14142, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:32.140161', 'step': 14142, 'epoch': 2} {'type': 'loss', 'content': 0.12412691116333008, 'timestamp': '2025-09-30 22:29:32.145607', 'step': 14143, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:32.185706', 'step': 14143, 'epoch': 2} {'type': 'loss', 'content': 0.1169515997171402, 'timestamp': '2025-09-30 22:29:32.220900', 'step': 14144, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:32.261125', 'step': 14144, 'epoch': 2} {'type': 'loss', 'content': 0.13753660023212433, 'timestamp': '2025-09-30 22:29:32.278664', 'step': 14145, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.316453', 'step': 14145, 'epoch': 2} {'type': 'loss', 'content': 0.07829747349023819, 'timestamp': '2025-09-30 22:29:32.331022', 'step': 14146, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.372521', 'step': 14146, 'epoch': 2} {'type': 'loss', 'content': 0.06834633648395538, 'timestamp': '2025-09-30 22:29:32.375664', 'step': 14147, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.420758', 'step': 14147, 'epoch': 2} {'type': 'loss', 'content': 0.10502223670482635, 'timestamp': '2025-09-30 22:29:32.448193', 'step': 14148, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.483357', 'step': 14148, 'epoch': 2} {'type': 'loss', 'content': 0.16311436891555786, 'timestamp': '2025-09-30 22:29:32.487437', 'step': 14149, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:32.520009', 'step': 14149, 'epoch': 2} {'type': 'loss', 'content': 0.17235979437828064, 'timestamp': '2025-09-30 22:29:32.532888', 'step': 14150, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:32.574322', 'step': 14150, 'epoch': 2} {'type': 'loss', 'content': 0.1165541261434555, 'timestamp': '2025-09-30 22:29:32.577066', 'step': 14151, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.607785', 'step': 14151, 'epoch': 2} {'type': 'loss', 'content': 0.057787470519542694, 'timestamp': '2025-09-30 22:29:32.632109', 'step': 14152, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:32.670233', 'step': 14152, 'epoch': 2} {'type': 'loss', 'content': 0.10844792425632477, 'timestamp': '2025-09-30 22:29:32.676082', 'step': 14153, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:32.709870', 'step': 14153, 'epoch': 2} {'type': 'loss', 'content': 0.0783974900841713, 'timestamp': '2025-09-30 22:29:32.714478', 'step': 14154, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:32.750322', 'step': 14154, 'epoch': 2} {'type': 'loss', 'content': 0.08933930099010468, 'timestamp': '2025-09-30 22:29:32.760273', 'step': 14155, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:32.794444', 'step': 14155, 'epoch': 2} {'type': 'loss', 'content': 0.09002282470464706, 'timestamp': '2025-09-30 22:29:32.820454', 'step': 14156, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:32.857875', 'step': 14156, 'epoch': 2} {'type': 'loss', 'content': 0.13128308951854706, 'timestamp': '2025-09-30 22:29:32.861894', 'step': 14157, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:32.897027', 'step': 14157, 'epoch': 2} {'type': 'loss', 'content': 0.12609411776065826, 'timestamp': '2025-09-30 22:29:32.917019', 'step': 14158, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:32.961136', 'step': 14158, 'epoch': 2} {'type': 'loss', 'content': 0.11237230896949768, 'timestamp': '2025-09-30 22:29:32.964362', 'step': 14159, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.006508', 'step': 14159, 'epoch': 2} {'type': 'loss', 'content': 0.06970295310020447, 'timestamp': '2025-09-30 22:29:33.030610', 'step': 14160, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:33.074774', 'step': 14160, 'epoch': 2} {'type': 'loss', 'content': 0.14475670456886292, 'timestamp': '2025-09-30 22:29:33.079244', 'step': 14161, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.111407', 'step': 14161, 'epoch': 2} {'type': 'loss', 'content': 0.11642339080572128, 'timestamp': '2025-09-30 22:29:33.115280', 'step': 14162, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:33.158781', 'step': 14162, 'epoch': 2} {'type': 'loss', 'content': 0.08051416277885437, 'timestamp': '2025-09-30 22:29:33.170117', 'step': 14163, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:33.202102', 'step': 14163, 'epoch': 2} {'type': 'loss', 'content': 0.10056670755147934, 'timestamp': '2025-09-30 22:29:33.238485', 'step': 14164, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:33.272989', 'step': 14164, 'epoch': 2} {'type': 'loss', 'content': 0.15184757113456726, 'timestamp': '2025-09-30 22:29:33.280221', 'step': 14165, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:33.317134', 'step': 14165, 'epoch': 2} {'type': 'loss', 'content': 0.09889983385801315, 'timestamp': '2025-09-30 22:29:33.326622', 'step': 14166, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:33.361819', 'step': 14166, 'epoch': 2} {'type': 'loss', 'content': 0.10214699804782867, 'timestamp': '2025-09-30 22:29:33.364917', 'step': 14167, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.398164', 'step': 14167, 'epoch': 2} {'type': 'loss', 'content': 0.09554802626371384, 'timestamp': '2025-09-30 22:29:33.433839', 'step': 14168, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.464564', 'step': 14168, 'epoch': 2} {'type': 'loss', 'content': 0.03251834958791733, 'timestamp': '2025-09-30 22:29:33.470748', 'step': 14169, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.506103', 'step': 14169, 'epoch': 2} {'type': 'loss', 'content': 0.12023560702800751, 'timestamp': '2025-09-30 22:29:33.511540', 'step': 14170, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:33.544027', 'step': 14170, 'epoch': 2} {'type': 'loss', 'content': 0.1959267556667328, 'timestamp': '2025-09-30 22:29:33.548812', 'step': 14171, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:33.593231', 'step': 14171, 'epoch': 2} {'type': 'loss', 'content': 0.09062402695417404, 'timestamp': '2025-09-30 22:29:33.629377', 'step': 14172, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:33.669675', 'step': 14172, 'epoch': 2} {'type': 'loss', 'content': 0.07322287559509277, 'timestamp': '2025-09-30 22:29:33.674603', 'step': 14173, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:33.711332', 'step': 14173, 'epoch': 2} {'type': 'loss', 'content': 0.18016298115253448, 'timestamp': '2025-09-30 22:29:33.716079', 'step': 14174, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:33.760845', 'step': 14174, 'epoch': 2} {'type': 'loss', 'content': 0.08513349294662476, 'timestamp': '2025-09-30 22:29:33.770397', 'step': 14175, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:33.802977', 'step': 14175, 'epoch': 2} {'type': 'loss', 'content': 0.1296088695526123, 'timestamp': '2025-09-30 22:29:33.828600', 'step': 14176, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:29:33.865549', 'step': 14176, 'epoch': 2} {'type': 'loss', 'content': 0.08745584636926651, 'timestamp': '2025-09-30 22:29:33.878788', 'step': 14177, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:33.919798', 'step': 14177, 'epoch': 2} {'type': 'loss', 'content': 0.1124294251203537, 'timestamp': '2025-09-30 22:29:33.923820', 'step': 14178, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:33.959601', 'step': 14178, 'epoch': 2} {'type': 'loss', 'content': 0.12017830461263657, 'timestamp': '2025-09-30 22:29:33.963749', 'step': 14179, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:33.997318', 'step': 14179, 'epoch': 2} {'type': 'loss', 'content': 0.11008734256029129, 'timestamp': '2025-09-30 22:29:34.023760', 'step': 14180, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.056940', 'step': 14180, 'epoch': 2} {'type': 'loss', 'content': 0.09345440566539764, 'timestamp': '2025-09-30 22:29:34.065389', 'step': 14181, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:34.095962', 'step': 14181, 'epoch': 2} {'type': 'loss', 'content': 0.20353950560092926, 'timestamp': '2025-09-30 22:29:34.099842', 'step': 14182, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.134122', 'step': 14182, 'epoch': 2} {'type': 'loss', 'content': 0.13666664063930511, 'timestamp': '2025-09-30 22:29:34.139139', 'step': 14183, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.179543', 'step': 14183, 'epoch': 2} {'type': 'loss', 'content': 0.08006040006875992, 'timestamp': '2025-09-30 22:29:34.205415', 'step': 14184, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.253560', 'step': 14184, 'epoch': 2} {'type': 'loss', 'content': 0.049886059015989304, 'timestamp': '2025-09-30 22:29:34.269762', 'step': 14185, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.303704', 'step': 14185, 'epoch': 2} {'type': 'loss', 'content': 0.04255973547697067, 'timestamp': '2025-09-30 22:29:34.317699', 'step': 14186, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:34.367257', 'step': 14186, 'epoch': 2} {'type': 'loss', 'content': 0.09124313294887543, 'timestamp': '2025-09-30 22:29:34.372016', 'step': 14187, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.409025', 'step': 14187, 'epoch': 2} {'type': 'loss', 'content': 0.08065548539161682, 'timestamp': '2025-09-30 22:29:34.445018', 'step': 14188, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:34.480983', 'step': 14188, 'epoch': 2} {'type': 'loss', 'content': 0.10143895447254181, 'timestamp': '2025-09-30 22:29:34.490309', 'step': 14189, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.521736', 'step': 14189, 'epoch': 2} {'type': 'loss', 'content': 0.16748641431331635, 'timestamp': '2025-09-30 22:29:34.527014', 'step': 14190, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:34.558678', 'step': 14190, 'epoch': 2} {'type': 'loss', 'content': 0.1588953137397766, 'timestamp': '2025-09-30 22:29:34.562773', 'step': 14191, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.593338', 'step': 14191, 'epoch': 2} {'type': 'loss', 'content': 0.12339363992214203, 'timestamp': '2025-09-30 22:29:34.618975', 'step': 14192, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.656647', 'step': 14192, 'epoch': 2} {'type': 'loss', 'content': 0.052819158881902695, 'timestamp': '2025-09-30 22:29:34.660529', 'step': 14193, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.694694', 'step': 14193, 'epoch': 2} {'type': 'loss', 'content': 0.09530560672283173, 'timestamp': '2025-09-30 22:29:34.698319', 'step': 14194, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.730849', 'step': 14194, 'epoch': 2} {'type': 'loss', 'content': 0.041571252048015594, 'timestamp': '2025-09-30 22:29:34.734289', 'step': 14195, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.765541', 'step': 14195, 'epoch': 2} {'type': 'loss', 'content': 0.09267499297857285, 'timestamp': '2025-09-30 22:29:34.791871', 'step': 14196, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.822324', 'step': 14196, 'epoch': 2} {'type': 'loss', 'content': 0.13381075859069824, 'timestamp': '2025-09-30 22:29:34.827315', 'step': 14197, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:34.861192', 'step': 14197, 'epoch': 2} {'type': 'loss', 'content': 0.14814811944961548, 'timestamp': '2025-09-30 22:29:34.876602', 'step': 14198, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:34.911273', 'step': 14198, 'epoch': 2} {'type': 'loss', 'content': 0.09598813205957413, 'timestamp': '2025-09-30 22:29:34.923034', 'step': 14199, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:34.960739', 'step': 14199, 'epoch': 2} {'type': 'loss', 'content': 0.10985760390758514, 'timestamp': '2025-09-30 22:29:34.987269', 'step': 14200, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.018672', 'step': 14200, 'epoch': 2} {'type': 'loss', 'content': 0.11694835126399994, 'timestamp': '2025-09-30 22:29:35.022884', 'step': 14201, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:35.058820', 'step': 14201, 'epoch': 2} {'type': 'loss', 'content': 0.049074724316596985, 'timestamp': '2025-09-30 22:29:35.070609', 'step': 14202, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.102519', 'step': 14202, 'epoch': 2} {'type': 'loss', 'content': 0.14533397555351257, 'timestamp': '2025-09-30 22:29:35.114520', 'step': 14203, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.147337', 'step': 14203, 'epoch': 2} {'type': 'loss', 'content': 0.08567371964454651, 'timestamp': '2025-09-30 22:29:35.182008', 'step': 14204, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.215775', 'step': 14204, 'epoch': 2} {'type': 'loss', 'content': 0.08354266732931137, 'timestamp': '2025-09-30 22:29:35.229284', 'step': 14205, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.273648', 'step': 14205, 'epoch': 2} {'type': 'loss', 'content': 0.10813306272029877, 'timestamp': '2025-09-30 22:29:35.285779', 'step': 14206, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.327474', 'step': 14206, 'epoch': 2} {'type': 'loss', 'content': 0.15395863354206085, 'timestamp': '2025-09-30 22:29:35.334347', 'step': 14207, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.367182', 'step': 14207, 'epoch': 2} {'type': 'loss', 'content': 0.1401103287935257, 'timestamp': '2025-09-30 22:29:35.392292', 'step': 14208, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.425636', 'step': 14208, 'epoch': 2} {'type': 'loss', 'content': 0.050651270896196365, 'timestamp': '2025-09-30 22:29:35.429308', 'step': 14209, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.461020', 'step': 14209, 'epoch': 2} {'type': 'loss', 'content': 0.17296338081359863, 'timestamp': '2025-09-30 22:29:35.474762', 'step': 14210, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.508717', 'step': 14210, 'epoch': 2} {'type': 'loss', 'content': 0.04412844032049179, 'timestamp': '2025-09-30 22:29:35.512940', 'step': 14211, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.558737', 'step': 14211, 'epoch': 2} {'type': 'loss', 'content': 0.1094912439584732, 'timestamp': '2025-09-30 22:29:35.586417', 'step': 14212, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.619739', 'step': 14212, 'epoch': 2} {'type': 'loss', 'content': 0.12646478414535522, 'timestamp': '2025-09-30 22:29:35.639506', 'step': 14213, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:35.680597', 'step': 14213, 'epoch': 2} {'type': 'loss', 'content': 0.06878861784934998, 'timestamp': '2025-09-30 22:29:35.696885', 'step': 14214, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:35.749215', 'step': 14214, 'epoch': 2} {'type': 'loss', 'content': 0.10528755187988281, 'timestamp': '2025-09-30 22:29:35.765257', 'step': 14215, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:35.798547', 'step': 14215, 'epoch': 2} {'type': 'loss', 'content': 0.11630041897296906, 'timestamp': '2025-09-30 22:29:35.826778', 'step': 14216, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:35.862910', 'step': 14216, 'epoch': 2} {'type': 'loss', 'content': 0.09297561645507812, 'timestamp': '2025-09-30 22:29:35.869170', 'step': 14217, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:35.917062', 'step': 14217, 'epoch': 2} {'type': 'loss', 'content': 0.03989045321941376, 'timestamp': '2025-09-30 22:29:35.922558', 'step': 14218, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:35.963399', 'step': 14218, 'epoch': 2} {'type': 'loss', 'content': 0.08926972001791, 'timestamp': '2025-09-30 22:29:35.970885', 'step': 14219, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:36.014578', 'step': 14219, 'epoch': 2} {'type': 'loss', 'content': 0.05872078612446785, 'timestamp': '2025-09-30 22:29:36.042122', 'step': 14220, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.083861', 'step': 14220, 'epoch': 2} {'type': 'loss', 'content': 0.1376342475414276, 'timestamp': '2025-09-30 22:29:36.088239', 'step': 14221, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:36.121017', 'step': 14221, 'epoch': 2} {'type': 'loss', 'content': 0.12881721556186676, 'timestamp': '2025-09-30 22:29:36.125682', 'step': 14222, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:36.175742', 'step': 14222, 'epoch': 2} {'type': 'loss', 'content': 0.0749928206205368, 'timestamp': '2025-09-30 22:29:36.181530', 'step': 14223, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.214618', 'step': 14223, 'epoch': 2} {'type': 'loss', 'content': 0.07891803979873657, 'timestamp': '2025-09-30 22:29:36.240300', 'step': 14224, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.273509', 'step': 14224, 'epoch': 2} {'type': 'loss', 'content': 0.12371969223022461, 'timestamp': '2025-09-30 22:29:36.279433', 'step': 14225, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:36.313351', 'step': 14225, 'epoch': 2} {'type': 'loss', 'content': 0.110855333507061, 'timestamp': '2025-09-30 22:29:36.318010', 'step': 14226, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.359599', 'step': 14226, 'epoch': 2} {'type': 'loss', 'content': 0.11987023800611496, 'timestamp': '2025-09-30 22:29:36.367069', 'step': 14227, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.402002', 'step': 14227, 'epoch': 2} {'type': 'loss', 'content': 0.16107629239559174, 'timestamp': '2025-09-30 22:29:36.427997', 'step': 14228, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.461686', 'step': 14228, 'epoch': 2} {'type': 'loss', 'content': 0.031088100746273994, 'timestamp': '2025-09-30 22:29:36.464581', 'step': 14229, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.497890', 'step': 14229, 'epoch': 2} {'type': 'loss', 'content': 0.17194142937660217, 'timestamp': '2025-09-30 22:29:36.516351', 'step': 14230, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:36.559388', 'step': 14230, 'epoch': 2} {'type': 'loss', 'content': 0.0747588500380516, 'timestamp': '2025-09-30 22:29:36.576720', 'step': 14231, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:36.620965', 'step': 14231, 'epoch': 2} {'type': 'loss', 'content': 0.13446077704429626, 'timestamp': '2025-09-30 22:29:36.651263', 'step': 14232, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:36.700287', 'step': 14232, 'epoch': 2} {'type': 'loss', 'content': 0.0831066146492958, 'timestamp': '2025-09-30 22:29:36.705750', 'step': 14233, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:36.751585', 'step': 14233, 'epoch': 2} {'type': 'loss', 'content': 0.14185978472232819, 'timestamp': '2025-09-30 22:29:36.756496', 'step': 14234, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:36.795541', 'step': 14234, 'epoch': 2} {'type': 'loss', 'content': 0.03210381418466568, 'timestamp': '2025-09-30 22:29:36.799940', 'step': 14235, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:36.835434', 'step': 14235, 'epoch': 2} {'type': 'loss', 'content': 0.06938721239566803, 'timestamp': '2025-09-30 22:29:36.860841', 'step': 14236, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:36.920491', 'step': 14236, 'epoch': 2} {'type': 'loss', 'content': 0.045410946011543274, 'timestamp': '2025-09-30 22:29:36.940864', 'step': 14237, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.012427', 'step': 14237, 'epoch': 2} {'type': 'loss', 'content': 0.11020836234092712, 'timestamp': '2025-09-30 22:29:37.030042', 'step': 14238, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:37.065732', 'step': 14238, 'epoch': 2} {'type': 'loss', 'content': 0.12396130710840225, 'timestamp': '2025-09-30 22:29:37.072173', 'step': 14239, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.106158', 'step': 14239, 'epoch': 2} {'type': 'loss', 'content': 0.13206860423088074, 'timestamp': '2025-09-30 22:29:37.132632', 'step': 14240, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.194336', 'step': 14240, 'epoch': 2} {'type': 'loss', 'content': 0.03699701279401779, 'timestamp': '2025-09-30 22:29:37.198259', 'step': 14241, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.241143', 'step': 14241, 'epoch': 2} {'type': 'loss', 'content': 0.0842178463935852, 'timestamp': '2025-09-30 22:29:37.246665', 'step': 14242, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:37.315405', 'step': 14242, 'epoch': 2} {'type': 'loss', 'content': 0.17748963832855225, 'timestamp': '2025-09-30 22:29:37.321007', 'step': 14243, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.354477', 'step': 14243, 'epoch': 2} {'type': 'loss', 'content': 0.09069634228944778, 'timestamp': '2025-09-30 22:29:37.380712', 'step': 14244, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.419044', 'step': 14244, 'epoch': 2} {'type': 'loss', 'content': 0.10123848170042038, 'timestamp': '2025-09-30 22:29:37.423397', 'step': 14245, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.457842', 'step': 14245, 'epoch': 2} {'type': 'loss', 'content': 0.06621681153774261, 'timestamp': '2025-09-30 22:29:37.474609', 'step': 14246, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:37.525614', 'step': 14246, 'epoch': 2} {'type': 'loss', 'content': 0.19061222672462463, 'timestamp': '2025-09-30 22:29:37.531348', 'step': 14247, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.569713', 'step': 14247, 'epoch': 2} {'type': 'loss', 'content': 0.08091060072183609, 'timestamp': '2025-09-30 22:29:37.595833', 'step': 14248, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.634048', 'step': 14248, 'epoch': 2} {'type': 'loss', 'content': 0.11469529569149017, 'timestamp': '2025-09-30 22:29:37.639053', 'step': 14249, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.693158', 'step': 14249, 'epoch': 2} {'type': 'loss', 'content': 0.1059890165925026, 'timestamp': '2025-09-30 22:29:37.712240', 'step': 14250, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:37.760695', 'step': 14250, 'epoch': 2} {'type': 'loss', 'content': 0.094509057700634, 'timestamp': '2025-09-30 22:29:37.765845', 'step': 14251, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:37.813549', 'step': 14251, 'epoch': 2} {'type': 'loss', 'content': 0.08550262451171875, 'timestamp': '2025-09-30 22:29:37.840648', 'step': 14252, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:37.875682', 'step': 14252, 'epoch': 2} {'type': 'loss', 'content': 0.08559311926364899, 'timestamp': '2025-09-30 22:29:37.881657', 'step': 14253, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:37.944601', 'step': 14253, 'epoch': 2} {'type': 'loss', 'content': 0.08669766038656235, 'timestamp': '2025-09-30 22:29:37.960391', 'step': 14254, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:37.994188', 'step': 14254, 'epoch': 2} {'type': 'loss', 'content': 0.09273122996091843, 'timestamp': '2025-09-30 22:29:37.998282', 'step': 14255, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.034787', 'step': 14255, 'epoch': 2} {'type': 'loss', 'content': 0.10033901780843735, 'timestamp': '2025-09-30 22:29:38.062015', 'step': 14256, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:38.121658', 'step': 14256, 'epoch': 2} {'type': 'loss', 'content': 0.11537753790616989, 'timestamp': '2025-09-30 22:29:38.138723', 'step': 14257, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.177932', 'step': 14257, 'epoch': 2} {'type': 'loss', 'content': 0.14356493949890137, 'timestamp': '2025-09-30 22:29:38.182413', 'step': 14258, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.216026', 'step': 14258, 'epoch': 2} {'type': 'loss', 'content': 0.1055503711104393, 'timestamp': '2025-09-30 22:29:38.220411', 'step': 14259, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:38.253425', 'step': 14259, 'epoch': 2} {'type': 'loss', 'content': 0.114817775785923, 'timestamp': '2025-09-30 22:29:38.294266', 'step': 14260, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.337250', 'step': 14260, 'epoch': 2} {'type': 'loss', 'content': 0.12611271440982819, 'timestamp': '2025-09-30 22:29:38.356180', 'step': 14261, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:38.388631', 'step': 14261, 'epoch': 2} {'type': 'loss', 'content': 0.19130457937717438, 'timestamp': '2025-09-30 22:29:38.393136', 'step': 14262, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.426137', 'step': 14262, 'epoch': 2} {'type': 'loss', 'content': 0.16155405342578888, 'timestamp': '2025-09-30 22:29:38.442316', 'step': 14263, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.476059', 'step': 14263, 'epoch': 2} {'type': 'loss', 'content': 0.08993420004844666, 'timestamp': '2025-09-30 22:29:38.511519', 'step': 14264, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:38.544687', 'step': 14264, 'epoch': 2} {'type': 'loss', 'content': 0.13710874319076538, 'timestamp': '2025-09-30 22:29:38.551576', 'step': 14265, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:38.582848', 'step': 14265, 'epoch': 2} {'type': 'loss', 'content': 0.07986287027597427, 'timestamp': '2025-09-30 22:29:38.588174', 'step': 14266, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.632547', 'step': 14266, 'epoch': 2} {'type': 'loss', 'content': 0.04571545496582985, 'timestamp': '2025-09-30 22:29:38.637670', 'step': 14267, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:38.677025', 'step': 14267, 'epoch': 2} {'type': 'loss', 'content': 0.12399851530790329, 'timestamp': '2025-09-30 22:29:38.702973', 'step': 14268, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:38.734428', 'step': 14268, 'epoch': 2} {'type': 'loss', 'content': 0.08674244582653046, 'timestamp': '2025-09-30 22:29:38.738698', 'step': 14269, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:38.770778', 'step': 14269, 'epoch': 2} {'type': 'loss', 'content': 0.16188418865203857, 'timestamp': '2025-09-30 22:29:38.774863', 'step': 14270, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:38.807535', 'step': 14270, 'epoch': 2} {'type': 'loss', 'content': 0.06953419744968414, 'timestamp': '2025-09-30 22:29:38.812571', 'step': 14271, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:38.859172', 'step': 14271, 'epoch': 2} {'type': 'loss', 'content': 0.13567736744880676, 'timestamp': '2025-09-30 22:29:38.889228', 'step': 14272, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.923492', 'step': 14272, 'epoch': 2} {'type': 'loss', 'content': 0.05160648375749588, 'timestamp': '2025-09-30 22:29:38.927569', 'step': 14273, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:38.961036', 'step': 14273, 'epoch': 2} {'type': 'loss', 'content': 0.06090538576245308, 'timestamp': '2025-09-30 22:29:38.976615', 'step': 14274, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.012940', 'step': 14274, 'epoch': 2} {'type': 'loss', 'content': 0.08619069308042526, 'timestamp': '2025-09-30 22:29:39.017044', 'step': 14275, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.048933', 'step': 14275, 'epoch': 2} {'type': 'loss', 'content': 0.08016201108694077, 'timestamp': '2025-09-30 22:29:39.076224', 'step': 14276, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.107848', 'step': 14276, 'epoch': 2} {'type': 'loss', 'content': 0.07624788582324982, 'timestamp': '2025-09-30 22:29:39.111475', 'step': 14277, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.153567', 'step': 14277, 'epoch': 2} {'type': 'loss', 'content': 0.14659786224365234, 'timestamp': '2025-09-30 22:29:39.159405', 'step': 14278, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:39.190147', 'step': 14278, 'epoch': 2} {'type': 'loss', 'content': 0.12558619678020477, 'timestamp': '2025-09-30 22:29:39.194806', 'step': 14279, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.240436', 'step': 14279, 'epoch': 2} {'type': 'loss', 'content': 0.06134767830371857, 'timestamp': '2025-09-30 22:29:39.276224', 'step': 14280, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.313462', 'step': 14280, 'epoch': 2} {'type': 'loss', 'content': 0.12772372364997864, 'timestamp': '2025-09-30 22:29:39.319147', 'step': 14281, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:39.361166', 'step': 14281, 'epoch': 2} {'type': 'loss', 'content': 0.08290079236030579, 'timestamp': '2025-09-30 22:29:39.365137', 'step': 14282, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:39.398272', 'step': 14282, 'epoch': 2} {'type': 'loss', 'content': 0.07659129798412323, 'timestamp': '2025-09-30 22:29:39.403661', 'step': 14283, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:39.435267', 'step': 14283, 'epoch': 2} {'type': 'loss', 'content': 0.10775645822286606, 'timestamp': '2025-09-30 22:29:39.462178', 'step': 14284, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:39.494417', 'step': 14284, 'epoch': 2} {'type': 'loss', 'content': 0.06913300603628159, 'timestamp': '2025-09-30 22:29:39.498428', 'step': 14285, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:39.530450', 'step': 14285, 'epoch': 2} {'type': 'loss', 'content': 0.15480422973632812, 'timestamp': '2025-09-30 22:29:39.535415', 'step': 14286, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.569325', 'step': 14286, 'epoch': 2} {'type': 'loss', 'content': 0.1302742213010788, 'timestamp': '2025-09-30 22:29:39.574437', 'step': 14287, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:39.610294', 'step': 14287, 'epoch': 2} {'type': 'loss', 'content': 0.06595663726329803, 'timestamp': '2025-09-30 22:29:39.636819', 'step': 14288, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.669561', 'step': 14288, 'epoch': 2} {'type': 'loss', 'content': 0.09210304170846939, 'timestamp': '2025-09-30 22:29:39.688929', 'step': 14289, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:39.727666', 'step': 14289, 'epoch': 2} {'type': 'loss', 'content': 0.0986088290810585, 'timestamp': '2025-09-30 22:29:39.732427', 'step': 14290, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:39.765334', 'step': 14290, 'epoch': 2} {'type': 'loss', 'content': 0.11716765910387039, 'timestamp': '2025-09-30 22:29:39.769222', 'step': 14291, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:39.801067', 'step': 14291, 'epoch': 2} {'type': 'loss', 'content': 0.05630212277173996, 'timestamp': '2025-09-30 22:29:39.826602', 'step': 14292, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:39.871528', 'step': 14292, 'epoch': 2} {'type': 'loss', 'content': 0.13990122079849243, 'timestamp': '2025-09-30 22:29:39.887734', 'step': 14293, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:39.932000', 'step': 14293, 'epoch': 2} {'type': 'loss', 'content': 0.12344443798065186, 'timestamp': '2025-09-30 22:29:39.937579', 'step': 14294, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:39.971055', 'step': 14294, 'epoch': 2} {'type': 'loss', 'content': 0.0647435262799263, 'timestamp': '2025-09-30 22:29:39.976343', 'step': 14295, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:40.010616', 'step': 14295, 'epoch': 2} {'type': 'loss', 'content': 0.09051849693059921, 'timestamp': '2025-09-30 22:29:40.037202', 'step': 14296, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:40.071414', 'step': 14296, 'epoch': 2} {'type': 'loss', 'content': 0.15628835558891296, 'timestamp': '2025-09-30 22:29:40.076592', 'step': 14297, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:40.109444', 'step': 14297, 'epoch': 2} {'type': 'loss', 'content': 0.09180470556020737, 'timestamp': '2025-09-30 22:29:40.114479', 'step': 14298, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:40.160535', 'step': 14298, 'epoch': 2} {'type': 'loss', 'content': 0.10108807682991028, 'timestamp': '2025-09-30 22:29:40.177796', 'step': 14299, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:40.224910', 'step': 14299, 'epoch': 2} {'type': 'loss', 'content': 0.08725807815790176, 'timestamp': '2025-09-30 22:29:40.263961', 'step': 14300, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:40.296318', 'step': 14300, 'epoch': 2} {'type': 'loss', 'content': 0.1031498983502388, 'timestamp': '2025-09-30 22:29:40.301416', 'step': 14301, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.346275', 'step': 14301, 'epoch': 2} {'type': 'loss', 'content': 0.06455032527446747, 'timestamp': '2025-09-30 22:29:40.351961', 'step': 14302, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:40.383676', 'step': 14302, 'epoch': 2} {'type': 'loss', 'content': 0.13214367628097534, 'timestamp': '2025-09-30 22:29:40.388599', 'step': 14303, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.426015', 'step': 14303, 'epoch': 2} {'type': 'loss', 'content': 0.08137091249227524, 'timestamp': '2025-09-30 22:29:40.453329', 'step': 14304, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:40.486571', 'step': 14304, 'epoch': 2} {'type': 'loss', 'content': 0.13177382946014404, 'timestamp': '2025-09-30 22:29:40.503524', 'step': 14305, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:40.548024', 'step': 14305, 'epoch': 2} {'type': 'loss', 'content': 0.07545346021652222, 'timestamp': '2025-09-30 22:29:40.553056', 'step': 14306, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:40.586719', 'step': 14306, 'epoch': 2} {'type': 'loss', 'content': 0.04557711258530617, 'timestamp': '2025-09-30 22:29:40.590711', 'step': 14307, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.633944', 'step': 14307, 'epoch': 2} {'type': 'loss', 'content': 0.10802312195301056, 'timestamp': '2025-09-30 22:29:40.671274', 'step': 14308, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.704654', 'step': 14308, 'epoch': 2} {'type': 'loss', 'content': 0.1120143011212349, 'timestamp': '2025-09-30 22:29:40.710796', 'step': 14309, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:40.755913', 'step': 14309, 'epoch': 2} {'type': 'loss', 'content': 0.09854604303836823, 'timestamp': '2025-09-30 22:29:40.759687', 'step': 14310, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.791607', 'step': 14310, 'epoch': 2} {'type': 'loss', 'content': 0.046660859137773514, 'timestamp': '2025-09-30 22:29:40.796516', 'step': 14311, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.828478', 'step': 14311, 'epoch': 2} {'type': 'loss', 'content': 0.0865418016910553, 'timestamp': '2025-09-30 22:29:40.853359', 'step': 14312, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.894065', 'step': 14312, 'epoch': 2} {'type': 'loss', 'content': 0.0751400738954544, 'timestamp': '2025-09-30 22:29:40.898832', 'step': 14313, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:40.932923', 'step': 14313, 'epoch': 2} {'type': 'loss', 'content': 0.13001079857349396, 'timestamp': '2025-09-30 22:29:40.938714', 'step': 14314, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:40.973164', 'step': 14314, 'epoch': 2} {'type': 'loss', 'content': 0.13599228858947754, 'timestamp': '2025-09-30 22:29:40.989841', 'step': 14315, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.022897', 'step': 14315, 'epoch': 2} {'type': 'loss', 'content': 0.08292876929044724, 'timestamp': '2025-09-30 22:29:41.048517', 'step': 14316, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.085015', 'step': 14316, 'epoch': 2} {'type': 'loss', 'content': 0.04510100558400154, 'timestamp': '2025-09-30 22:29:41.090516', 'step': 14317, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:41.134362', 'step': 14317, 'epoch': 2} {'type': 'loss', 'content': 0.06933023780584335, 'timestamp': '2025-09-30 22:29:41.138181', 'step': 14318, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:41.367351', 'step': 14318, 'epoch': 2} {'type': 'loss', 'content': 0.11496780067682266, 'timestamp': '2025-09-30 22:29:41.371946', 'step': 14319, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:41.407953', 'step': 14319, 'epoch': 2} {'type': 'loss', 'content': 0.1064554825425148, 'timestamp': '2025-09-30 22:29:41.435172', 'step': 14320, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.469489', 'step': 14320, 'epoch': 2} {'type': 'loss', 'content': 0.09338216483592987, 'timestamp': '2025-09-30 22:29:41.479694', 'step': 14321, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.522067', 'step': 14321, 'epoch': 2} {'type': 'loss', 'content': 0.09835126250982285, 'timestamp': '2025-09-30 22:29:41.527277', 'step': 14322, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:41.562173', 'step': 14322, 'epoch': 2} {'type': 'loss', 'content': 0.19717912375926971, 'timestamp': '2025-09-30 22:29:41.567587', 'step': 14323, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:41.599943', 'step': 14323, 'epoch': 2} {'type': 'loss', 'content': 0.18106773495674133, 'timestamp': '2025-09-30 22:29:41.625775', 'step': 14324, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:41.658839', 'step': 14324, 'epoch': 2} {'type': 'loss', 'content': 0.05080049857497215, 'timestamp': '2025-09-30 22:29:41.663916', 'step': 14325, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:41.697059', 'step': 14325, 'epoch': 2} {'type': 'loss', 'content': 0.047864943742752075, 'timestamp': '2025-09-30 22:29:41.708052', 'step': 14326, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:41.751494', 'step': 14326, 'epoch': 2} {'type': 'loss', 'content': 0.11178514361381531, 'timestamp': '2025-09-30 22:29:41.767836', 'step': 14327, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.802151', 'step': 14327, 'epoch': 2} {'type': 'loss', 'content': 0.0901498943567276, 'timestamp': '2025-09-30 22:29:41.840732', 'step': 14328, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.885739', 'step': 14328, 'epoch': 2} {'type': 'loss', 'content': 0.14776989817619324, 'timestamp': '2025-09-30 22:29:41.890345', 'step': 14329, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:41.922491', 'step': 14329, 'epoch': 2} {'type': 'loss', 'content': 0.07147802412509918, 'timestamp': '2025-09-30 22:29:41.933523', 'step': 14330, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:41.965269', 'step': 14330, 'epoch': 2} {'type': 'loss', 'content': 0.10861141979694366, 'timestamp': '2025-09-30 22:29:41.979947', 'step': 14331, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:42.013534', 'step': 14331, 'epoch': 2} {'type': 'loss', 'content': 0.1355426162481308, 'timestamp': '2025-09-30 22:29:42.052750', 'step': 14332, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:42.095786', 'step': 14332, 'epoch': 2} {'type': 'loss', 'content': 0.11622835695743561, 'timestamp': '2025-09-30 22:29:42.102699', 'step': 14333, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:42.139056', 'step': 14333, 'epoch': 2} {'type': 'loss', 'content': 0.12473051995038986, 'timestamp': '2025-09-30 22:29:42.143943', 'step': 14334, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:42.175486', 'step': 14334, 'epoch': 2} {'type': 'loss', 'content': 0.1632884293794632, 'timestamp': '2025-09-30 22:29:42.190040', 'step': 14335, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:42.224738', 'step': 14335, 'epoch': 2} {'type': 'loss', 'content': 0.08868954330682755, 'timestamp': '2025-09-30 22:29:42.260499', 'step': 14336, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:42.304297', 'step': 14336, 'epoch': 2} {'type': 'loss', 'content': 0.1268617808818817, 'timestamp': '2025-09-30 22:29:42.308533', 'step': 14337, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:42.341917', 'step': 14337, 'epoch': 2} {'type': 'loss', 'content': 0.08625680953264236, 'timestamp': '2025-09-30 22:29:42.346397', 'step': 14338, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:42.380051', 'step': 14338, 'epoch': 2} {'type': 'loss', 'content': 0.10234804451465607, 'timestamp': '2025-09-30 22:29:42.384507', 'step': 14339, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:42.416168', 'step': 14339, 'epoch': 2} {'type': 'loss', 'content': 0.0728510320186615, 'timestamp': '2025-09-30 22:29:42.452663', 'step': 14340, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:42.493257', 'step': 14340, 'epoch': 2} {'type': 'loss', 'content': 0.07647406309843063, 'timestamp': '2025-09-30 22:29:42.497540', 'step': 14341, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:42.530417', 'step': 14341, 'epoch': 2} {'type': 'loss', 'content': 0.1923048198223114, 'timestamp': '2025-09-30 22:29:42.534503', 'step': 14342, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:42.579534', 'step': 14342, 'epoch': 2} {'type': 'loss', 'content': 0.054004501551389694, 'timestamp': '2025-09-30 22:29:42.583953', 'step': 14343, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:42.627487', 'step': 14343, 'epoch': 2} {'type': 'loss', 'content': 0.1422303169965744, 'timestamp': '2025-09-30 22:29:42.653874', 'step': 14344, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:42.686167', 'step': 14344, 'epoch': 2} {'type': 'loss', 'content': 0.08546720445156097, 'timestamp': '2025-09-30 22:29:42.700491', 'step': 14345, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:42.746299', 'step': 14345, 'epoch': 2} {'type': 'loss', 'content': 0.10070841014385223, 'timestamp': '2025-09-30 22:29:42.762480', 'step': 14346, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:42.808163', 'step': 14346, 'epoch': 2} {'type': 'loss', 'content': 0.05114813894033432, 'timestamp': '2025-09-30 22:29:42.811462', 'step': 14347, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:42.844902', 'step': 14347, 'epoch': 2} {'type': 'loss', 'content': 0.0575406551361084, 'timestamp': '2025-09-30 22:29:42.870283', 'step': 14348, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:42.903078', 'step': 14348, 'epoch': 2} {'type': 'loss', 'content': 0.0838722437620163, 'timestamp': '2025-09-30 22:29:42.919769', 'step': 14349, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:42.965787', 'step': 14349, 'epoch': 2} {'type': 'loss', 'content': 0.07937884330749512, 'timestamp': '2025-09-30 22:29:42.982193', 'step': 14350, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:43.025806', 'step': 14350, 'epoch': 2} {'type': 'loss', 'content': 0.08791439235210419, 'timestamp': '2025-09-30 22:29:43.044955', 'step': 14351, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:43.076636', 'step': 14351, 'epoch': 2} {'type': 'loss', 'content': 0.09791147708892822, 'timestamp': '2025-09-30 22:29:43.104046', 'step': 14352, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.144315', 'step': 14352, 'epoch': 2} {'type': 'loss', 'content': 0.15235085785388947, 'timestamp': '2025-09-30 22:29:43.161381', 'step': 14353, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:43.206938', 'step': 14353, 'epoch': 2} {'type': 'loss', 'content': 0.09074213355779648, 'timestamp': '2025-09-30 22:29:43.224943', 'step': 14354, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:43.258236', 'step': 14354, 'epoch': 2} {'type': 'loss', 'content': 0.09907066822052002, 'timestamp': '2025-09-30 22:29:43.263375', 'step': 14355, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.296276', 'step': 14355, 'epoch': 2} {'type': 'loss', 'content': 0.08678529411554337, 'timestamp': '2025-09-30 22:29:43.324241', 'step': 14356, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:43.357454', 'step': 14356, 'epoch': 2} {'type': 'loss', 'content': 0.09129556268453598, 'timestamp': '2025-09-30 22:29:43.363606', 'step': 14357, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:43.398071', 'step': 14357, 'epoch': 2} {'type': 'loss', 'content': 0.09534061700105667, 'timestamp': '2025-09-30 22:29:43.413091', 'step': 14358, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.460326', 'step': 14358, 'epoch': 2} {'type': 'loss', 'content': 0.11630171537399292, 'timestamp': '2025-09-30 22:29:43.464989', 'step': 14359, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:43.497434', 'step': 14359, 'epoch': 2} {'type': 'loss', 'content': 0.10903441160917282, 'timestamp': '2025-09-30 22:29:43.522461', 'step': 14360, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:43.555286', 'step': 14360, 'epoch': 2} {'type': 'loss', 'content': 0.10042721778154373, 'timestamp': '2025-09-30 22:29:43.571146', 'step': 14361, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.615401', 'step': 14361, 'epoch': 2} {'type': 'loss', 'content': 0.18864955008029938, 'timestamp': '2025-09-30 22:29:43.633696', 'step': 14362, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:43.672778', 'step': 14362, 'epoch': 2} {'type': 'loss', 'content': 0.14999110996723175, 'timestamp': '2025-09-30 22:29:43.690464', 'step': 14363, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.734204', 'step': 14363, 'epoch': 2} {'type': 'loss', 'content': 0.057525571435689926, 'timestamp': '2025-09-30 22:29:43.761469', 'step': 14364, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:43.795524', 'step': 14364, 'epoch': 2} {'type': 'loss', 'content': 0.13193920254707336, 'timestamp': '2025-09-30 22:29:43.799837', 'step': 14365, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:43.836337', 'step': 14365, 'epoch': 2} {'type': 'loss', 'content': 0.15275266766548157, 'timestamp': '2025-09-30 22:29:43.840377', 'step': 14366, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:43.883263', 'step': 14366, 'epoch': 2} {'type': 'loss', 'content': 0.08668094873428345, 'timestamp': '2025-09-30 22:29:43.896825', 'step': 14367, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:43.941751', 'step': 14367, 'epoch': 2} {'type': 'loss', 'content': 0.06466231495141983, 'timestamp': '2025-09-30 22:29:43.971019', 'step': 14368, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.022384', 'step': 14368, 'epoch': 2} {'type': 'loss', 'content': 0.11262557655572891, 'timestamp': '2025-09-30 22:29:44.026248', 'step': 14369, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.058343', 'step': 14369, 'epoch': 2} {'type': 'loss', 'content': 0.16219773888587952, 'timestamp': '2025-09-30 22:29:44.061422', 'step': 14370, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.105643', 'step': 14370, 'epoch': 2} {'type': 'loss', 'content': 0.08334226906299591, 'timestamp': '2025-09-30 22:29:44.112123', 'step': 14371, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.150925', 'step': 14371, 'epoch': 2} {'type': 'loss', 'content': 0.09206467121839523, 'timestamp': '2025-09-30 22:29:44.184357', 'step': 14372, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.219115', 'step': 14372, 'epoch': 2} {'type': 'loss', 'content': 0.06758678704500198, 'timestamp': '2025-09-30 22:29:44.232153', 'step': 14373, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.272534', 'step': 14373, 'epoch': 2} {'type': 'loss', 'content': 0.09040903300046921, 'timestamp': '2025-09-30 22:29:44.275902', 'step': 14374, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.307114', 'step': 14374, 'epoch': 2} {'type': 'loss', 'content': 0.14564429223537445, 'timestamp': '2025-09-30 22:29:44.311334', 'step': 14375, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.354713', 'step': 14375, 'epoch': 2} {'type': 'loss', 'content': 0.07752490043640137, 'timestamp': '2025-09-30 22:29:44.381182', 'step': 14376, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:44.423135', 'step': 14376, 'epoch': 2} {'type': 'loss', 'content': 0.06506114453077316, 'timestamp': '2025-09-30 22:29:44.441140', 'step': 14377, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:44.483341', 'step': 14377, 'epoch': 2} {'type': 'loss', 'content': 0.04531712457537651, 'timestamp': '2025-09-30 22:29:44.488480', 'step': 14378, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:44.532735', 'step': 14378, 'epoch': 2} {'type': 'loss', 'content': 0.10439284145832062, 'timestamp': '2025-09-30 22:29:44.536341', 'step': 14379, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.570495', 'step': 14379, 'epoch': 2} {'type': 'loss', 'content': 0.05720043554902077, 'timestamp': '2025-09-30 22:29:44.597581', 'step': 14380, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:44.640191', 'step': 14380, 'epoch': 2} {'type': 'loss', 'content': 0.09065958112478256, 'timestamp': '2025-09-30 22:29:44.642708', 'step': 14381, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:44.675030', 'step': 14381, 'epoch': 2} {'type': 'loss', 'content': 0.10687439888715744, 'timestamp': '2025-09-30 22:29:44.690985', 'step': 14382, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:44.734576', 'step': 14382, 'epoch': 2} {'type': 'loss', 'content': 0.11737208068370819, 'timestamp': '2025-09-30 22:29:44.737951', 'step': 14383, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.769389', 'step': 14383, 'epoch': 2} {'type': 'loss', 'content': 0.08607936650514603, 'timestamp': '2025-09-30 22:29:44.794874', 'step': 14384, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.826658', 'step': 14384, 'epoch': 2} {'type': 'loss', 'content': 0.10624600946903229, 'timestamp': '2025-09-30 22:29:44.840953', 'step': 14385, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:44.888400', 'step': 14385, 'epoch': 2} {'type': 'loss', 'content': 0.11960729956626892, 'timestamp': '2025-09-30 22:29:44.893120', 'step': 14386, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:44.927412', 'step': 14386, 'epoch': 2} {'type': 'loss', 'content': 0.09077484160661697, 'timestamp': '2025-09-30 22:29:44.934116', 'step': 14387, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:44.966027', 'step': 14387, 'epoch': 2} {'type': 'loss', 'content': 0.14888864755630493, 'timestamp': '2025-09-30 22:29:44.990126', 'step': 14388, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.023341', 'step': 14388, 'epoch': 2} {'type': 'loss', 'content': 0.11338423192501068, 'timestamp': '2025-09-30 22:29:45.028550', 'step': 14389, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.061834', 'step': 14389, 'epoch': 2} {'type': 'loss', 'content': 0.12256108969449997, 'timestamp': '2025-09-30 22:29:45.067577', 'step': 14390, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.101327', 'step': 14390, 'epoch': 2} {'type': 'loss', 'content': 0.06399887055158615, 'timestamp': '2025-09-30 22:29:45.104353', 'step': 14391, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:45.140333', 'step': 14391, 'epoch': 2} {'type': 'loss', 'content': 0.051679931581020355, 'timestamp': '2025-09-30 22:29:45.165059', 'step': 14392, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:45.203868', 'step': 14392, 'epoch': 2} {'type': 'loss', 'content': 0.058583199977874756, 'timestamp': '2025-09-30 22:29:45.206997', 'step': 14393, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:45.239086', 'step': 14393, 'epoch': 2} {'type': 'loss', 'content': 0.10020437836647034, 'timestamp': '2025-09-30 22:29:45.242833', 'step': 14394, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.276770', 'step': 14394, 'epoch': 2} {'type': 'loss', 'content': 0.07294397056102753, 'timestamp': '2025-09-30 22:29:45.281244', 'step': 14395, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:45.321111', 'step': 14395, 'epoch': 2} {'type': 'loss', 'content': 0.10945563018321991, 'timestamp': '2025-09-30 22:29:45.347488', 'step': 14396, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.378959', 'step': 14396, 'epoch': 2} {'type': 'loss', 'content': 0.06804497539997101, 'timestamp': '2025-09-30 22:29:45.382544', 'step': 14397, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.414456', 'step': 14397, 'epoch': 2} {'type': 'loss', 'content': 0.0748772919178009, 'timestamp': '2025-09-30 22:29:45.417393', 'step': 14398, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:29:45.464321', 'step': 14398, 'epoch': 2} {'type': 'loss', 'content': 0.07082125544548035, 'timestamp': '2025-09-30 22:29:45.468962', 'step': 14399, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.500376', 'step': 14399, 'epoch': 2} {'type': 'loss', 'content': 0.11660942435264587, 'timestamp': '2025-09-30 22:29:45.542908', 'step': 14400, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.583412', 'step': 14400, 'epoch': 2} {'type': 'loss', 'content': 0.12986619770526886, 'timestamp': '2025-09-30 22:29:45.587915', 'step': 14401, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.627075', 'step': 14401, 'epoch': 2} {'type': 'loss', 'content': 0.12644615769386292, 'timestamp': '2025-09-30 22:29:45.641482', 'step': 14402, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.673770', 'step': 14402, 'epoch': 2} {'type': 'loss', 'content': 0.11335215717554092, 'timestamp': '2025-09-30 22:29:45.676609', 'step': 14403, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:45.708770', 'step': 14403, 'epoch': 2} {'type': 'loss', 'content': 0.08802478015422821, 'timestamp': '2025-09-30 22:29:45.734739', 'step': 14404, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.766806', 'step': 14404, 'epoch': 2} {'type': 'loss', 'content': 0.05660821869969368, 'timestamp': '2025-09-30 22:29:45.771698', 'step': 14405, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.813297', 'step': 14405, 'epoch': 2} {'type': 'loss', 'content': 0.12301180511713028, 'timestamp': '2025-09-30 22:29:45.826454', 'step': 14406, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.870085', 'step': 14406, 'epoch': 2} {'type': 'loss', 'content': 0.10081835091114044, 'timestamp': '2025-09-30 22:29:45.874324', 'step': 14407, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:45.907247', 'step': 14407, 'epoch': 2} {'type': 'loss', 'content': 0.026009496301412582, 'timestamp': '2025-09-30 22:29:45.934212', 'step': 14408, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:45.964093', 'step': 14408, 'epoch': 2} {'type': 'loss', 'content': 0.09393514692783356, 'timestamp': '2025-09-30 22:29:45.973132', 'step': 14409, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:29:46.011526', 'step': 14409, 'epoch': 2} {'type': 'loss', 'content': 0.19871972501277924, 'timestamp': '2025-09-30 22:29:46.018630', 'step': 14410, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:46.058999', 'step': 14410, 'epoch': 2} {'type': 'loss', 'content': 0.09041702002286911, 'timestamp': '2025-09-30 22:29:46.061996', 'step': 14411, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.101001', 'step': 14411, 'epoch': 2} {'type': 'loss', 'content': 0.09102053195238113, 'timestamp': '2025-09-30 22:29:46.137237', 'step': 14412, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:46.170660', 'step': 14412, 'epoch': 2} {'type': 'loss', 'content': 0.13301685452461243, 'timestamp': '2025-09-30 22:29:46.174546', 'step': 14413, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.205622', 'step': 14413, 'epoch': 2} {'type': 'loss', 'content': 0.06463924795389175, 'timestamp': '2025-09-30 22:29:46.219890', 'step': 14414, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:46.252746', 'step': 14414, 'epoch': 2} {'type': 'loss', 'content': 0.2028590738773346, 'timestamp': '2025-09-30 22:29:46.256638', 'step': 14415, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.289339', 'step': 14415, 'epoch': 2} {'type': 'loss', 'content': 0.09951765835285187, 'timestamp': '2025-09-30 22:29:46.317160', 'step': 14416, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:46.365025', 'step': 14416, 'epoch': 2} {'type': 'loss', 'content': 0.2360500991344452, 'timestamp': '2025-09-30 22:29:46.370710', 'step': 14417, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.404114', 'step': 14417, 'epoch': 2} {'type': 'loss', 'content': 0.04810096323490143, 'timestamp': '2025-09-30 22:29:46.417802', 'step': 14418, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:46.451102', 'step': 14418, 'epoch': 2} {'type': 'loss', 'content': 0.15653406083583832, 'timestamp': '2025-09-30 22:29:46.455708', 'step': 14419, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:46.486813', 'step': 14419, 'epoch': 2} {'type': 'loss', 'content': 0.10086861252784729, 'timestamp': '2025-09-30 22:29:46.521913', 'step': 14420, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.553767', 'step': 14420, 'epoch': 2} {'type': 'loss', 'content': 0.13376609981060028, 'timestamp': '2025-09-30 22:29:46.557246', 'step': 14421, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:46.598713', 'step': 14421, 'epoch': 2} {'type': 'loss', 'content': 0.12440991401672363, 'timestamp': '2025-09-30 22:29:46.612032', 'step': 14422, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.644350', 'step': 14422, 'epoch': 2} {'type': 'loss', 'content': 0.08711770921945572, 'timestamp': '2025-09-30 22:29:46.650144', 'step': 14423, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.682986', 'step': 14423, 'epoch': 2} {'type': 'loss', 'content': 0.053579181432724, 'timestamp': '2025-09-30 22:29:46.710487', 'step': 14424, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:46.741951', 'step': 14424, 'epoch': 2} {'type': 'loss', 'content': 0.11551455408334732, 'timestamp': '2025-09-30 22:29:46.745035', 'step': 14425, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.776064', 'step': 14425, 'epoch': 2} {'type': 'loss', 'content': 0.038182251155376434, 'timestamp': '2025-09-30 22:29:46.781081', 'step': 14426, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:46.814893', 'step': 14426, 'epoch': 2} {'type': 'loss', 'content': 0.09149932861328125, 'timestamp': '2025-09-30 22:29:46.828357', 'step': 14427, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:46.859692', 'step': 14427, 'epoch': 2} {'type': 'loss', 'content': 0.11278070509433746, 'timestamp': '2025-09-30 22:29:46.884836', 'step': 14428, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:46.916742', 'step': 14428, 'epoch': 2} {'type': 'loss', 'content': 0.023452892899513245, 'timestamp': '2025-09-30 22:29:46.921536', 'step': 14429, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:46.954743', 'step': 14429, 'epoch': 2} {'type': 'loss', 'content': 0.08733195066452026, 'timestamp': '2025-09-30 22:29:46.968943', 'step': 14430, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.007681', 'step': 14430, 'epoch': 2} {'type': 'loss', 'content': 0.1142418161034584, 'timestamp': '2025-09-30 22:29:47.010449', 'step': 14431, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.042665', 'step': 14431, 'epoch': 2} {'type': 'loss', 'content': 0.1016562208533287, 'timestamp': '2025-09-30 22:29:47.067915', 'step': 14432, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:47.101334', 'step': 14432, 'epoch': 2} {'type': 'loss', 'content': 0.21209914982318878, 'timestamp': '2025-09-30 22:29:47.106533', 'step': 14433, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:47.138423', 'step': 14433, 'epoch': 2} {'type': 'loss', 'content': 0.13519562780857086, 'timestamp': '2025-09-30 22:29:47.145622', 'step': 14434, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:47.178404', 'step': 14434, 'epoch': 2} {'type': 'loss', 'content': 0.14393176138401031, 'timestamp': '2025-09-30 22:29:47.181653', 'step': 14435, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.213899', 'step': 14435, 'epoch': 2} {'type': 'loss', 'content': 0.09528033435344696, 'timestamp': '2025-09-30 22:29:47.240426', 'step': 14436, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.271345', 'step': 14436, 'epoch': 2} {'type': 'loss', 'content': 0.13075664639472961, 'timestamp': '2025-09-30 22:29:47.276604', 'step': 14437, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.309475', 'step': 14437, 'epoch': 2} {'type': 'loss', 'content': 0.21082021296024323, 'timestamp': '2025-09-30 22:29:47.315531', 'step': 14438, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.349715', 'step': 14438, 'epoch': 2} {'type': 'loss', 'content': 0.07927832007408142, 'timestamp': '2025-09-30 22:29:47.364855', 'step': 14439, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.397328', 'step': 14439, 'epoch': 2} {'type': 'loss', 'content': 0.10511097311973572, 'timestamp': '2025-09-30 22:29:47.424619', 'step': 14440, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.463114', 'step': 14440, 'epoch': 2} {'type': 'loss', 'content': 0.09782302379608154, 'timestamp': '2025-09-30 22:29:47.467477', 'step': 14441, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.499984', 'step': 14441, 'epoch': 2} {'type': 'loss', 'content': 0.0480535514652729, 'timestamp': '2025-09-30 22:29:47.504665', 'step': 14442, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:47.536529', 'step': 14442, 'epoch': 2} {'type': 'loss', 'content': 0.1834438443183899, 'timestamp': '2025-09-30 22:29:47.541162', 'step': 14443, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.575184', 'step': 14443, 'epoch': 2} {'type': 'loss', 'content': 0.05626200512051582, 'timestamp': '2025-09-30 22:29:47.600802', 'step': 14444, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:47.638481', 'step': 14444, 'epoch': 2} {'type': 'loss', 'content': 0.048306904733181, 'timestamp': '2025-09-30 22:29:47.643257', 'step': 14445, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:47.677850', 'step': 14445, 'epoch': 2} {'type': 'loss', 'content': 0.14762824773788452, 'timestamp': '2025-09-30 22:29:47.683595', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:29:55.809343', 'step': 14446, 'epoch': 2} {'type': 'pplx', 'content': 8685.579263913747, 'timestamp': '2025-09-30 22:29:55.814031', 'step': 14446, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:55.845647', 'step': 14446, 'epoch': 2} {'type': 'loss', 'content': 0.13887181878089905, 'timestamp': '2025-09-30 22:29:55.849387', 'step': 14447, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:55.892535', 'step': 14447, 'epoch': 2} {'type': 'loss', 'content': 0.08777259290218353, 'timestamp': '2025-09-30 22:29:55.918546', 'step': 14448, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:55.950361', 'step': 14448, 'epoch': 2} {'type': 'loss', 'content': 0.05058981105685234, 'timestamp': '2025-09-30 22:29:55.959297', 'step': 14449, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:55.992295', 'step': 14449, 'epoch': 2} {'type': 'loss', 'content': 0.12661829590797424, 'timestamp': '2025-09-30 22:29:55.998131', 'step': 14450, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.030948', 'step': 14450, 'epoch': 2} {'type': 'loss', 'content': 0.11820509284734726, 'timestamp': '2025-09-30 22:29:56.046858', 'step': 14451, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.092239', 'step': 14451, 'epoch': 2} {'type': 'loss', 'content': 0.11421045660972595, 'timestamp': '2025-09-30 22:29:56.119270', 'step': 14452, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:56.161760', 'step': 14452, 'epoch': 2} {'type': 'loss', 'content': 0.12446315586566925, 'timestamp': '2025-09-30 22:29:56.167098', 'step': 14453, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:56.223043', 'step': 14453, 'epoch': 2} {'type': 'loss', 'content': 0.0588068850338459, 'timestamp': '2025-09-30 22:29:56.228639', 'step': 14454, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:56.267312', 'step': 14454, 'epoch': 2} {'type': 'loss', 'content': 0.17685960233211517, 'timestamp': '2025-09-30 22:29:56.272021', 'step': 14455, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.305111', 'step': 14455, 'epoch': 2} {'type': 'loss', 'content': 0.06556811183691025, 'timestamp': '2025-09-30 22:29:56.331360', 'step': 14456, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:56.365101', 'step': 14456, 'epoch': 2} {'type': 'loss', 'content': 0.07836952060461044, 'timestamp': '2025-09-30 22:29:56.369040', 'step': 14457, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.401692', 'step': 14457, 'epoch': 2} {'type': 'loss', 'content': 0.15138693153858185, 'timestamp': '2025-09-30 22:29:56.406500', 'step': 14458, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.440348', 'step': 14458, 'epoch': 2} {'type': 'loss', 'content': 0.10543406009674072, 'timestamp': '2025-09-30 22:29:56.458467', 'step': 14459, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.492692', 'step': 14459, 'epoch': 2} {'type': 'loss', 'content': 0.22781452536582947, 'timestamp': '2025-09-30 22:29:56.518975', 'step': 14460, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.552305', 'step': 14460, 'epoch': 2} {'type': 'loss', 'content': 0.03005191870033741, 'timestamp': '2025-09-30 22:29:56.555572', 'step': 14461, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:56.599924', 'step': 14461, 'epoch': 2} {'type': 'loss', 'content': 0.09751430153846741, 'timestamp': '2025-09-30 22:29:56.602930', 'step': 14462, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:56.635103', 'step': 14462, 'epoch': 2} {'type': 'loss', 'content': 0.14432591199874878, 'timestamp': '2025-09-30 22:29:56.639706', 'step': 14463, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.671633', 'step': 14463, 'epoch': 2} {'type': 'loss', 'content': 0.10966543853282928, 'timestamp': '2025-09-30 22:29:56.697577', 'step': 14464, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.730678', 'step': 14464, 'epoch': 2} {'type': 'loss', 'content': 0.0967104583978653, 'timestamp': '2025-09-30 22:29:56.735554', 'step': 14465, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.768458', 'step': 14465, 'epoch': 2} {'type': 'loss', 'content': 0.09776270389556885, 'timestamp': '2025-09-30 22:29:56.773629', 'step': 14466, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:56.805935', 'step': 14466, 'epoch': 2} {'type': 'loss', 'content': 0.09804791212081909, 'timestamp': '2025-09-30 22:29:56.824780', 'step': 14467, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.860399', 'step': 14467, 'epoch': 2} {'type': 'loss', 'content': 0.07311736047267914, 'timestamp': '2025-09-30 22:29:56.886460', 'step': 14468, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.920460', 'step': 14468, 'epoch': 2} {'type': 'loss', 'content': 0.13270308077335358, 'timestamp': '2025-09-30 22:29:56.936074', 'step': 14469, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:56.985276', 'step': 14469, 'epoch': 2} {'type': 'loss', 'content': 0.06036429479718208, 'timestamp': '2025-09-30 22:29:56.996636', 'step': 14470, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:57.036390', 'step': 14470, 'epoch': 2} {'type': 'loss', 'content': 0.08734084665775299, 'timestamp': '2025-09-30 22:29:57.040750', 'step': 14471, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.081455', 'step': 14471, 'epoch': 2} {'type': 'loss', 'content': 0.1277807652950287, 'timestamp': '2025-09-30 22:29:57.116170', 'step': 14472, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:57.152798', 'step': 14472, 'epoch': 2} {'type': 'loss', 'content': 0.12707526981830597, 'timestamp': '2025-09-30 22:29:57.157044', 'step': 14473, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:57.189128', 'step': 14473, 'epoch': 2} {'type': 'loss', 'content': 0.16707555949687958, 'timestamp': '2025-09-30 22:29:57.192612', 'step': 14474, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:57.225884', 'step': 14474, 'epoch': 2} {'type': 'loss', 'content': 0.08854587376117706, 'timestamp': '2025-09-30 22:29:57.230291', 'step': 14475, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:57.278167', 'step': 14475, 'epoch': 2} {'type': 'loss', 'content': 0.1287454217672348, 'timestamp': '2025-09-30 22:29:57.306947', 'step': 14476, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.342325', 'step': 14476, 'epoch': 2} {'type': 'loss', 'content': 0.14756931364536285, 'timestamp': '2025-09-30 22:29:57.347790', 'step': 14477, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.387829', 'step': 14477, 'epoch': 2} {'type': 'loss', 'content': 0.14729636907577515, 'timestamp': '2025-09-30 22:29:57.403356', 'step': 14478, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.450738', 'step': 14478, 'epoch': 2} {'type': 'loss', 'content': 0.12380650639533997, 'timestamp': '2025-09-30 22:29:57.467717', 'step': 14479, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:57.501071', 'step': 14479, 'epoch': 2} {'type': 'loss', 'content': 0.047850459814071655, 'timestamp': '2025-09-30 22:29:57.528983', 'step': 14480, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:57.560512', 'step': 14480, 'epoch': 2} {'type': 'loss', 'content': 0.12015193700790405, 'timestamp': '2025-09-30 22:29:57.565690', 'step': 14481, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:29:57.598559', 'step': 14481, 'epoch': 2} {'type': 'loss', 'content': 0.10001233965158463, 'timestamp': '2025-09-30 22:29:57.605744', 'step': 14482, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:57.639925', 'step': 14482, 'epoch': 2} {'type': 'loss', 'content': 0.08287246525287628, 'timestamp': '2025-09-30 22:29:57.645855', 'step': 14483, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.684189', 'step': 14483, 'epoch': 2} {'type': 'loss', 'content': 0.06037802994251251, 'timestamp': '2025-09-30 22:29:57.712948', 'step': 14484, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.747396', 'step': 14484, 'epoch': 2} {'type': 'loss', 'content': 0.1294204294681549, 'timestamp': '2025-09-30 22:29:57.751593', 'step': 14485, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:57.783604', 'step': 14485, 'epoch': 2} {'type': 'loss', 'content': 0.18355214595794678, 'timestamp': '2025-09-30 22:29:57.790301', 'step': 14486, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.826330', 'step': 14486, 'epoch': 2} {'type': 'loss', 'content': 0.12597119808197021, 'timestamp': '2025-09-30 22:29:57.832151', 'step': 14487, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:57.870439', 'step': 14487, 'epoch': 2} {'type': 'loss', 'content': 0.080131396651268, 'timestamp': '2025-09-30 22:29:57.895715', 'step': 14488, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:29:57.944110', 'step': 14488, 'epoch': 2} {'type': 'loss', 'content': 0.06404899060726166, 'timestamp': '2025-09-30 22:29:57.961537', 'step': 14489, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:58.015785', 'step': 14489, 'epoch': 2} {'type': 'loss', 'content': 0.11921164393424988, 'timestamp': '2025-09-30 22:29:58.020513', 'step': 14490, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:58.064195', 'step': 14490, 'epoch': 2} {'type': 'loss', 'content': 0.03113340027630329, 'timestamp': '2025-09-30 22:29:58.068957', 'step': 14491, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:58.100687', 'step': 14491, 'epoch': 2} {'type': 'loss', 'content': 0.08597902208566666, 'timestamp': '2025-09-30 22:29:58.128302', 'step': 14492, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:58.176284', 'step': 14492, 'epoch': 2} {'type': 'loss', 'content': 0.08861928433179855, 'timestamp': '2025-09-30 22:29:58.195703', 'step': 14493, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:58.228934', 'step': 14493, 'epoch': 2} {'type': 'loss', 'content': 0.15604054927825928, 'timestamp': '2025-09-30 22:29:58.244379', 'step': 14494, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:58.288013', 'step': 14494, 'epoch': 2} {'type': 'loss', 'content': 0.062445394694805145, 'timestamp': '2025-09-30 22:29:58.291826', 'step': 14495, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:29:58.325272', 'step': 14495, 'epoch': 2} {'type': 'loss', 'content': 0.08189193159341812, 'timestamp': '2025-09-30 22:29:58.352127', 'step': 14496, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:29:58.393127', 'step': 14496, 'epoch': 2} {'type': 'loss', 'content': 0.10151684284210205, 'timestamp': '2025-09-30 22:29:58.398347', 'step': 14497, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:58.442460', 'step': 14497, 'epoch': 2} {'type': 'loss', 'content': 0.06762367486953735, 'timestamp': '2025-09-30 22:29:58.448340', 'step': 14498, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:29:58.481202', 'step': 14498, 'epoch': 2} {'type': 'loss', 'content': 0.11989466100931168, 'timestamp': '2025-09-30 22:29:58.487348', 'step': 14499, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:29:58.520695', 'step': 14499, 'epoch': 2} {'type': 'loss', 'content': 0.14465221762657166, 'timestamp': '2025-09-30 22:29:58.548534', 'step': 14500, 'epoch': 2} {'type': 'info', 'content': 'Checkpoint saved at step 14500', 'timestamp': '2025-09-30 22:30:04.199592', 'step': 14500, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.233257', 'step': 14500, 'epoch': 2} {'type': 'loss', 'content': 0.06325764954090118, 'timestamp': '2025-09-30 22:30:04.238020', 'step': 14501, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.288203', 'step': 14501, 'epoch': 2} {'type': 'loss', 'content': 0.15311381220817566, 'timestamp': '2025-09-30 22:30:04.292780', 'step': 14502, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.338139', 'step': 14502, 'epoch': 2} {'type': 'loss', 'content': 0.07446647435426712, 'timestamp': '2025-09-30 22:30:04.355489', 'step': 14503, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:04.400546', 'step': 14503, 'epoch': 2} {'type': 'loss', 'content': 0.1091533824801445, 'timestamp': '2025-09-30 22:30:04.432994', 'step': 14504, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:04.475833', 'step': 14504, 'epoch': 2} {'type': 'loss', 'content': 0.09120991826057434, 'timestamp': '2025-09-30 22:30:04.484709', 'step': 14505, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.527408', 'step': 14505, 'epoch': 2} {'type': 'loss', 'content': 0.06612545996904373, 'timestamp': '2025-09-30 22:30:04.531403', 'step': 14506, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.563471', 'step': 14506, 'epoch': 2} {'type': 'loss', 'content': 0.12162835150957108, 'timestamp': '2025-09-30 22:30:04.569160', 'step': 14507, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.603373', 'step': 14507, 'epoch': 2} {'type': 'loss', 'content': 0.11586160957813263, 'timestamp': '2025-09-30 22:30:04.640230', 'step': 14508, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.673777', 'step': 14508, 'epoch': 2} {'type': 'loss', 'content': 0.07906141877174377, 'timestamp': '2025-09-30 22:30:04.679283', 'step': 14509, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.713762', 'step': 14509, 'epoch': 2} {'type': 'loss', 'content': 0.07638458907604218, 'timestamp': '2025-09-30 22:30:04.719512', 'step': 14510, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.754402', 'step': 14510, 'epoch': 2} {'type': 'loss', 'content': 0.1296602189540863, 'timestamp': '2025-09-30 22:30:04.774008', 'step': 14511, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:04.807636', 'step': 14511, 'epoch': 2} {'type': 'loss', 'content': 0.17177365720272064, 'timestamp': '2025-09-30 22:30:04.834745', 'step': 14512, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:04.868668', 'step': 14512, 'epoch': 2} {'type': 'loss', 'content': 0.13623400032520294, 'timestamp': '2025-09-30 22:30:04.873351', 'step': 14513, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:04.906525', 'step': 14513, 'epoch': 2} {'type': 'loss', 'content': 0.10022255033254623, 'timestamp': '2025-09-30 22:30:04.921990', 'step': 14514, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:04.968047', 'step': 14514, 'epoch': 2} {'type': 'loss', 'content': 0.12384874373674393, 'timestamp': '2025-09-30 22:30:04.984543', 'step': 14515, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.016390', 'step': 14515, 'epoch': 2} {'type': 'loss', 'content': 0.07294368743896484, 'timestamp': '2025-09-30 22:30:05.043897', 'step': 14516, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:05.078141', 'step': 14516, 'epoch': 2} {'type': 'loss', 'content': 0.16362427175045013, 'timestamp': '2025-09-30 22:30:05.083955', 'step': 14517, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:05.114922', 'step': 14517, 'epoch': 2} {'type': 'loss', 'content': 0.15350030362606049, 'timestamp': '2025-09-30 22:30:05.120007', 'step': 14518, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:05.153502', 'step': 14518, 'epoch': 2} {'type': 'loss', 'content': 0.18131566047668457, 'timestamp': '2025-09-30 22:30:05.159340', 'step': 14519, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:05.193632', 'step': 14519, 'epoch': 2} {'type': 'loss', 'content': 0.258003830909729, 'timestamp': '2025-09-30 22:30:05.232032', 'step': 14520, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:05.279984', 'step': 14520, 'epoch': 2} {'type': 'loss', 'content': 0.0915651023387909, 'timestamp': '2025-09-30 22:30:05.287055', 'step': 14521, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:05.321364', 'step': 14521, 'epoch': 2} {'type': 'loss', 'content': 0.18287135660648346, 'timestamp': '2025-09-30 22:30:05.338639', 'step': 14522, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.383463', 'step': 14522, 'epoch': 2} {'type': 'loss', 'content': 0.1448102593421936, 'timestamp': '2025-09-30 22:30:05.389538', 'step': 14523, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:05.425032', 'step': 14523, 'epoch': 2} {'type': 'loss', 'content': 0.14467847347259521, 'timestamp': '2025-09-30 22:30:05.451340', 'step': 14524, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.485261', 'step': 14524, 'epoch': 2} {'type': 'loss', 'content': 0.09640664607286453, 'timestamp': '2025-09-30 22:30:05.497060', 'step': 14525, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.542559', 'step': 14525, 'epoch': 2} {'type': 'loss', 'content': 0.16413265466690063, 'timestamp': '2025-09-30 22:30:05.547085', 'step': 14526, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:05.581281', 'step': 14526, 'epoch': 2} {'type': 'loss', 'content': 0.1535840779542923, 'timestamp': '2025-09-30 22:30:05.586188', 'step': 14527, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.618606', 'step': 14527, 'epoch': 2} {'type': 'loss', 'content': 0.03636319562792778, 'timestamp': '2025-09-30 22:30:05.645763', 'step': 14528, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.690576', 'step': 14528, 'epoch': 2} {'type': 'loss', 'content': 0.09552536904811859, 'timestamp': '2025-09-30 22:30:05.703114', 'step': 14529, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:05.734267', 'step': 14529, 'epoch': 2} {'type': 'loss', 'content': 0.07597020268440247, 'timestamp': '2025-09-30 22:30:05.740383', 'step': 14530, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:05.772744', 'step': 14530, 'epoch': 2} {'type': 'loss', 'content': 0.09799143671989441, 'timestamp': '2025-09-30 22:30:05.778017', 'step': 14531, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:05.811512', 'step': 14531, 'epoch': 2} {'type': 'loss', 'content': 0.1067635789513588, 'timestamp': '2025-09-30 22:30:05.836702', 'step': 14532, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:05.877327', 'step': 14532, 'epoch': 2} {'type': 'loss', 'content': 0.08726674318313599, 'timestamp': '2025-09-30 22:30:05.891321', 'step': 14533, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:05.924389', 'step': 14533, 'epoch': 2} {'type': 'loss', 'content': 0.10381637513637543, 'timestamp': '2025-09-30 22:30:05.942232', 'step': 14534, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:05.988320', 'step': 14534, 'epoch': 2} {'type': 'loss', 'content': 0.08931988477706909, 'timestamp': '2025-09-30 22:30:05.993791', 'step': 14535, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.038079', 'step': 14535, 'epoch': 2} {'type': 'loss', 'content': 0.0898328348994255, 'timestamp': '2025-09-30 22:30:06.075160', 'step': 14536, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:06.107564', 'step': 14536, 'epoch': 2} {'type': 'loss', 'content': 0.07027044892311096, 'timestamp': '2025-09-30 22:30:06.112595', 'step': 14537, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:06.147961', 'step': 14537, 'epoch': 2} {'type': 'loss', 'content': 0.11457525193691254, 'timestamp': '2025-09-30 22:30:06.153678', 'step': 14538, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.198804', 'step': 14538, 'epoch': 2} {'type': 'loss', 'content': 0.07986858487129211, 'timestamp': '2025-09-30 22:30:06.217093', 'step': 14539, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:06.250330', 'step': 14539, 'epoch': 2} {'type': 'loss', 'content': 0.08400797843933105, 'timestamp': '2025-09-30 22:30:06.286047', 'step': 14540, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.322984', 'step': 14540, 'epoch': 2} {'type': 'loss', 'content': 0.13747303187847137, 'timestamp': '2025-09-30 22:30:06.329223', 'step': 14541, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.371327', 'step': 14541, 'epoch': 2} {'type': 'loss', 'content': 0.10375863313674927, 'timestamp': '2025-09-30 22:30:06.376824', 'step': 14542, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:06.409713', 'step': 14542, 'epoch': 2} {'type': 'loss', 'content': 0.1085057184100151, 'timestamp': '2025-09-30 22:30:06.427838', 'step': 14543, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.470637', 'step': 14543, 'epoch': 2} {'type': 'loss', 'content': 0.13424131274223328, 'timestamp': '2025-09-30 22:30:06.496744', 'step': 14544, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:06.542041', 'step': 14544, 'epoch': 2} {'type': 'loss', 'content': 0.14557364583015442, 'timestamp': '2025-09-30 22:30:06.558290', 'step': 14545, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.593085', 'step': 14545, 'epoch': 2} {'type': 'loss', 'content': 0.12566955387592316, 'timestamp': '2025-09-30 22:30:06.609053', 'step': 14546, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.643584', 'step': 14546, 'epoch': 2} {'type': 'loss', 'content': 0.13006927073001862, 'timestamp': '2025-09-30 22:30:06.647944', 'step': 14547, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.681910', 'step': 14547, 'epoch': 2} {'type': 'loss', 'content': 0.11566603183746338, 'timestamp': '2025-09-30 22:30:06.708698', 'step': 14548, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.752458', 'step': 14548, 'epoch': 2} {'type': 'loss', 'content': 0.1109207421541214, 'timestamp': '2025-09-30 22:30:06.755475', 'step': 14549, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:06.789090', 'step': 14549, 'epoch': 2} {'type': 'loss', 'content': 0.05235808342695236, 'timestamp': '2025-09-30 22:30:06.793513', 'step': 14550, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:06.825781', 'step': 14550, 'epoch': 2} {'type': 'loss', 'content': 0.0757039412856102, 'timestamp': '2025-09-30 22:30:06.843309', 'step': 14551, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:06.887893', 'step': 14551, 'epoch': 2} {'type': 'loss', 'content': 0.0433979332447052, 'timestamp': '2025-09-30 22:30:06.927001', 'step': 14552, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:06.960905', 'step': 14552, 'epoch': 2} {'type': 'loss', 'content': 0.07350073754787445, 'timestamp': '2025-09-30 22:30:06.964569', 'step': 14553, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:07.009636', 'step': 14553, 'epoch': 2} {'type': 'loss', 'content': 0.04840054735541344, 'timestamp': '2025-09-30 22:30:07.026293', 'step': 14554, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:07.059670', 'step': 14554, 'epoch': 2} {'type': 'loss', 'content': 0.17982766032218933, 'timestamp': '2025-09-30 22:30:07.064342', 'step': 14555, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:07.115303', 'step': 14555, 'epoch': 2} {'type': 'loss', 'content': 0.1192009449005127, 'timestamp': '2025-09-30 22:30:07.152585', 'step': 14556, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:07.195771', 'step': 14556, 'epoch': 2} {'type': 'loss', 'content': 0.09672429412603378, 'timestamp': '2025-09-30 22:30:07.199892', 'step': 14557, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:07.241695', 'step': 14557, 'epoch': 2} {'type': 'loss', 'content': 0.12925124168395996, 'timestamp': '2025-09-30 22:30:07.247277', 'step': 14558, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:07.281435', 'step': 14558, 'epoch': 2} {'type': 'loss', 'content': 0.06585131585597992, 'timestamp': '2025-09-30 22:30:07.299219', 'step': 14559, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:07.348593', 'step': 14559, 'epoch': 2} {'type': 'loss', 'content': 0.10154087096452713, 'timestamp': '2025-09-30 22:30:07.376241', 'step': 14560, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:07.409080', 'step': 14560, 'epoch': 2} {'type': 'loss', 'content': 0.12484350800514221, 'timestamp': '2025-09-30 22:30:07.430161', 'step': 14561, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:07.476828', 'step': 14561, 'epoch': 2} {'type': 'loss', 'content': 0.14136342704296112, 'timestamp': '2025-09-30 22:30:07.482011', 'step': 14562, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:07.514536', 'step': 14562, 'epoch': 2} {'type': 'loss', 'content': 0.08630428463220596, 'timestamp': '2025-09-30 22:30:07.520756', 'step': 14563, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:07.554708', 'step': 14563, 'epoch': 2} {'type': 'loss', 'content': 0.19463087618350983, 'timestamp': '2025-09-30 22:30:07.582475', 'step': 14564, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:07.614979', 'step': 14564, 'epoch': 2} {'type': 'loss', 'content': 0.07906268537044525, 'timestamp': '2025-09-30 22:30:07.620287', 'step': 14565, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:07.666844', 'step': 14565, 'epoch': 2} {'type': 'loss', 'content': 0.11566723883152008, 'timestamp': '2025-09-30 22:30:07.684801', 'step': 14566, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:07.716318', 'step': 14566, 'epoch': 2} {'type': 'loss', 'content': 0.059297043830156326, 'timestamp': '2025-09-30 22:30:07.723511', 'step': 14567, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:07.757502', 'step': 14567, 'epoch': 2} {'type': 'loss', 'content': 0.09018994867801666, 'timestamp': '2025-09-30 22:30:07.797248', 'step': 14568, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:07.844078', 'step': 14568, 'epoch': 2} {'type': 'loss', 'content': 0.03229238837957382, 'timestamp': '2025-09-30 22:30:07.850169', 'step': 14569, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:07.884660', 'step': 14569, 'epoch': 2} {'type': 'loss', 'content': 0.09907082468271255, 'timestamp': '2025-09-30 22:30:07.889689', 'step': 14570, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:07.934375', 'step': 14570, 'epoch': 2} {'type': 'loss', 'content': 0.09736865013837814, 'timestamp': '2025-09-30 22:30:07.940127', 'step': 14571, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:07.977736', 'step': 14571, 'epoch': 2} {'type': 'loss', 'content': 0.17052815854549408, 'timestamp': '2025-09-30 22:30:08.002824', 'step': 14572, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:08.036107', 'step': 14572, 'epoch': 2} {'type': 'loss', 'content': 0.0943465456366539, 'timestamp': '2025-09-30 22:30:08.054138', 'step': 14573, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:08.103166', 'step': 14573, 'epoch': 2} {'type': 'loss', 'content': 0.05860138311982155, 'timestamp': '2025-09-30 22:30:08.109226', 'step': 14574, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:08.142195', 'step': 14574, 'epoch': 2} {'type': 'loss', 'content': 0.11433014273643494, 'timestamp': '2025-09-30 22:30:08.162846', 'step': 14575, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.209103', 'step': 14575, 'epoch': 2} {'type': 'loss', 'content': 0.049110643565654755, 'timestamp': '2025-09-30 22:30:08.249123', 'step': 14576, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.292782', 'step': 14576, 'epoch': 2} {'type': 'loss', 'content': 0.1308654099702835, 'timestamp': '2025-09-30 22:30:08.298842', 'step': 14577, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.332317', 'step': 14577, 'epoch': 2} {'type': 'loss', 'content': 0.21932664513587952, 'timestamp': '2025-09-30 22:30:08.337655', 'step': 14578, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:08.371842', 'step': 14578, 'epoch': 2} {'type': 'loss', 'content': 0.033965032547712326, 'timestamp': '2025-09-30 22:30:08.388805', 'step': 14579, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.433492', 'step': 14579, 'epoch': 2} {'type': 'loss', 'content': 0.08234193176031113, 'timestamp': '2025-09-30 22:30:08.460437', 'step': 14580, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:08.494915', 'step': 14580, 'epoch': 2} {'type': 'loss', 'content': 0.13964861631393433, 'timestamp': '2025-09-30 22:30:08.500531', 'step': 14581, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.547719', 'step': 14581, 'epoch': 2} {'type': 'loss', 'content': 0.050280388444662094, 'timestamp': '2025-09-30 22:30:08.552794', 'step': 14582, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.597057', 'step': 14582, 'epoch': 2} {'type': 'loss', 'content': 0.08804360032081604, 'timestamp': '2025-09-30 22:30:08.613911', 'step': 14583, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.662633', 'step': 14583, 'epoch': 2} {'type': 'loss', 'content': 0.06098225712776184, 'timestamp': '2025-09-30 22:30:08.691156', 'step': 14584, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.724466', 'step': 14584, 'epoch': 2} {'type': 'loss', 'content': 0.05339236930012703, 'timestamp': '2025-09-30 22:30:08.730862', 'step': 14585, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.780660', 'step': 14585, 'epoch': 2} {'type': 'loss', 'content': 0.07701646536588669, 'timestamp': '2025-09-30 22:30:08.786875', 'step': 14586, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.828800', 'step': 14586, 'epoch': 2} {'type': 'loss', 'content': 0.05928856506943703, 'timestamp': '2025-09-30 22:30:08.832515', 'step': 14587, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:08.869503', 'step': 14587, 'epoch': 2} {'type': 'loss', 'content': 0.16337858140468597, 'timestamp': '2025-09-30 22:30:08.896704', 'step': 14588, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:08.931285', 'step': 14588, 'epoch': 2} {'type': 'loss', 'content': 0.08378516137599945, 'timestamp': '2025-09-30 22:30:08.934680', 'step': 14589, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:08.981674', 'step': 14589, 'epoch': 2} {'type': 'loss', 'content': 0.1597709059715271, 'timestamp': '2025-09-30 22:30:08.998596', 'step': 14590, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:09.045443', 'step': 14590, 'epoch': 2} {'type': 'loss', 'content': 0.08129771798849106, 'timestamp': '2025-09-30 22:30:09.049725', 'step': 14591, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:09.082372', 'step': 14591, 'epoch': 2} {'type': 'loss', 'content': 0.08293993026018143, 'timestamp': '2025-09-30 22:30:09.107804', 'step': 14592, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.142418', 'step': 14592, 'epoch': 2} {'type': 'loss', 'content': 0.10537385940551758, 'timestamp': '2025-09-30 22:30:09.146534', 'step': 14593, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:09.178666', 'step': 14593, 'epoch': 2} {'type': 'loss', 'content': 0.05692193657159805, 'timestamp': '2025-09-30 22:30:09.183319', 'step': 14594, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:09.214666', 'step': 14594, 'epoch': 2} {'type': 'loss', 'content': 0.08326079696416855, 'timestamp': '2025-09-30 22:30:09.218293', 'step': 14595, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.253351', 'step': 14595, 'epoch': 2} {'type': 'loss', 'content': 0.11080242693424225, 'timestamp': '2025-09-30 22:30:09.287711', 'step': 14596, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.320690', 'step': 14596, 'epoch': 2} {'type': 'loss', 'content': 0.048556990921497345, 'timestamp': '2025-09-30 22:30:09.325663', 'step': 14597, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:09.358061', 'step': 14597, 'epoch': 2} {'type': 'loss', 'content': 0.055041879415512085, 'timestamp': '2025-09-30 22:30:09.362233', 'step': 14598, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:09.401324', 'step': 14598, 'epoch': 2} {'type': 'loss', 'content': 0.04792946204543114, 'timestamp': '2025-09-30 22:30:09.419117', 'step': 14599, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.464195', 'step': 14599, 'epoch': 2} {'type': 'loss', 'content': 0.058655571192502975, 'timestamp': '2025-09-30 22:30:09.489020', 'step': 14600, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:09.524542', 'step': 14600, 'epoch': 2} {'type': 'loss', 'content': 0.07390408217906952, 'timestamp': '2025-09-30 22:30:09.541093', 'step': 14601, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.572758', 'step': 14601, 'epoch': 2} {'type': 'loss', 'content': 0.0683726817369461, 'timestamp': '2025-09-30 22:30:09.576279', 'step': 14602, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:09.614286', 'step': 14602, 'epoch': 2} {'type': 'loss', 'content': 0.09622029960155487, 'timestamp': '2025-09-30 22:30:09.632471', 'step': 14603, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:09.678120', 'step': 14603, 'epoch': 2} {'type': 'loss', 'content': 0.14079973101615906, 'timestamp': '2025-09-30 22:30:09.705364', 'step': 14604, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:09.739553', 'step': 14604, 'epoch': 2} {'type': 'loss', 'content': 0.036344461143016815, 'timestamp': '2025-09-30 22:30:09.743052', 'step': 14605, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:09.775772', 'step': 14605, 'epoch': 2} {'type': 'loss', 'content': 0.12188153713941574, 'timestamp': '2025-09-30 22:30:09.787296', 'step': 14606, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:09.818763', 'step': 14606, 'epoch': 2} {'type': 'loss', 'content': 0.060401953756809235, 'timestamp': '2025-09-30 22:30:09.823254', 'step': 14607, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.867431', 'step': 14607, 'epoch': 2} {'type': 'loss', 'content': 0.06610547751188278, 'timestamp': '2025-09-30 22:30:09.892206', 'step': 14608, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:09.925680', 'step': 14608, 'epoch': 2} {'type': 'loss', 'content': 0.12722446024417877, 'timestamp': '2025-09-30 22:30:09.930097', 'step': 14609, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:09.979613', 'step': 14609, 'epoch': 2} {'type': 'loss', 'content': 0.090519018471241, 'timestamp': '2025-09-30 22:30:09.982053', 'step': 14610, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.030035', 'step': 14610, 'epoch': 2} {'type': 'loss', 'content': 0.04590146988630295, 'timestamp': '2025-09-30 22:30:10.034126', 'step': 14611, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:10.067546', 'step': 14611, 'epoch': 2} {'type': 'loss', 'content': 0.1446932554244995, 'timestamp': '2025-09-30 22:30:10.094634', 'step': 14612, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.129360', 'step': 14612, 'epoch': 2} {'type': 'loss', 'content': 0.07221343368291855, 'timestamp': '2025-09-30 22:30:10.148526', 'step': 14613, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:10.181107', 'step': 14613, 'epoch': 2} {'type': 'loss', 'content': 0.08213835209608078, 'timestamp': '2025-09-30 22:30:10.184763', 'step': 14614, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:10.218098', 'step': 14614, 'epoch': 2} {'type': 'loss', 'content': 0.06833433359861374, 'timestamp': '2025-09-30 22:30:10.222017', 'step': 14615, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.255925', 'step': 14615, 'epoch': 2} {'type': 'loss', 'content': 0.08526238799095154, 'timestamp': '2025-09-30 22:30:10.283839', 'step': 14616, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:10.329696', 'step': 14616, 'epoch': 2} {'type': 'loss', 'content': 0.07381290942430496, 'timestamp': '2025-09-30 22:30:10.346525', 'step': 14617, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:30:10.381726', 'step': 14617, 'epoch': 2} {'type': 'loss', 'content': 0.09957204759120941, 'timestamp': '2025-09-30 22:30:10.398810', 'step': 14618, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:10.448523', 'step': 14618, 'epoch': 2} {'type': 'loss', 'content': 0.16626420617103577, 'timestamp': '2025-09-30 22:30:10.452865', 'step': 14619, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:10.501398', 'step': 14619, 'epoch': 2} {'type': 'loss', 'content': 0.040972933173179626, 'timestamp': '2025-09-30 22:30:10.528202', 'step': 14620, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:10.562546', 'step': 14620, 'epoch': 2} {'type': 'loss', 'content': 0.1986270546913147, 'timestamp': '2025-09-30 22:30:10.567643', 'step': 14621, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:10.602162', 'step': 14621, 'epoch': 2} {'type': 'loss', 'content': 0.11293284595012665, 'timestamp': '2025-09-30 22:30:10.607553', 'step': 14622, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:10.654992', 'step': 14622, 'epoch': 2} {'type': 'loss', 'content': 0.1090458333492279, 'timestamp': '2025-09-30 22:30:10.660121', 'step': 14623, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:10.692409', 'step': 14623, 'epoch': 2} {'type': 'loss', 'content': 0.09895727038383484, 'timestamp': '2025-09-30 22:30:10.731186', 'step': 14624, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.764677', 'step': 14624, 'epoch': 2} {'type': 'loss', 'content': 0.11296014487743378, 'timestamp': '2025-09-30 22:30:10.781156', 'step': 14625, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.816222', 'step': 14625, 'epoch': 2} {'type': 'loss', 'content': 0.06845270097255707, 'timestamp': '2025-09-30 22:30:10.822441', 'step': 14626, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:10.865540', 'step': 14626, 'epoch': 2} {'type': 'loss', 'content': 0.160692036151886, 'timestamp': '2025-09-30 22:30:10.871897', 'step': 14627, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:10.910689', 'step': 14627, 'epoch': 2} {'type': 'loss', 'content': 0.06599786132574081, 'timestamp': '2025-09-30 22:30:10.937094', 'step': 14628, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:10.979217', 'step': 14628, 'epoch': 2} {'type': 'loss', 'content': 0.1185356080532074, 'timestamp': '2025-09-30 22:30:10.984881', 'step': 14629, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:11.019233', 'step': 14629, 'epoch': 2} {'type': 'loss', 'content': 0.08803129196166992, 'timestamp': '2025-09-30 22:30:11.036162', 'step': 14630, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.068614', 'step': 14630, 'epoch': 2} {'type': 'loss', 'content': 0.09891887754201889, 'timestamp': '2025-09-30 22:30:11.072765', 'step': 14631, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.105457', 'step': 14631, 'epoch': 2} {'type': 'loss', 'content': 0.11768902838230133, 'timestamp': '2025-09-30 22:30:11.146358', 'step': 14632, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.179813', 'step': 14632, 'epoch': 2} {'type': 'loss', 'content': 0.08460237830877304, 'timestamp': '2025-09-30 22:30:11.184798', 'step': 14633, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:11.221273', 'step': 14633, 'epoch': 2} {'type': 'loss', 'content': 0.07073739171028137, 'timestamp': '2025-09-30 22:30:11.236840', 'step': 14634, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.273633', 'step': 14634, 'epoch': 2} {'type': 'loss', 'content': 0.08193755894899368, 'timestamp': '2025-09-30 22:30:11.279514', 'step': 14635, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.315267', 'step': 14635, 'epoch': 2} {'type': 'loss', 'content': 0.10690464079380035, 'timestamp': '2025-09-30 22:30:11.342380', 'step': 14636, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.375165', 'step': 14636, 'epoch': 2} {'type': 'loss', 'content': 0.1716669350862503, 'timestamp': '2025-09-30 22:30:11.378407', 'step': 14637, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.423915', 'step': 14637, 'epoch': 2} {'type': 'loss', 'content': 0.0572115033864975, 'timestamp': '2025-09-30 22:30:11.428649', 'step': 14638, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.460766', 'step': 14638, 'epoch': 2} {'type': 'loss', 'content': 0.055706411600112915, 'timestamp': '2025-09-30 22:30:11.466077', 'step': 14639, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.510496', 'step': 14639, 'epoch': 2} {'type': 'loss', 'content': 0.10180976986885071, 'timestamp': '2025-09-30 22:30:11.536718', 'step': 14640, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.575382', 'step': 14640, 'epoch': 2} {'type': 'loss', 'content': 0.16223487257957458, 'timestamp': '2025-09-30 22:30:11.579979', 'step': 14641, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:11.625524', 'step': 14641, 'epoch': 2} {'type': 'loss', 'content': 0.11886172741651535, 'timestamp': '2025-09-30 22:30:11.641524', 'step': 14642, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.683609', 'step': 14642, 'epoch': 2} {'type': 'loss', 'content': 0.072529636323452, 'timestamp': '2025-09-30 22:30:11.687141', 'step': 14643, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:11.721405', 'step': 14643, 'epoch': 2} {'type': 'loss', 'content': 0.1607515960931778, 'timestamp': '2025-09-30 22:30:11.748854', 'step': 14644, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.785170', 'step': 14644, 'epoch': 2} {'type': 'loss', 'content': 0.1348295956850052, 'timestamp': '2025-09-30 22:30:11.789371', 'step': 14645, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.827931', 'step': 14645, 'epoch': 2} {'type': 'loss', 'content': 0.06631841510534286, 'timestamp': '2025-09-30 22:30:11.834499', 'step': 14646, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.867719', 'step': 14646, 'epoch': 2} {'type': 'loss', 'content': 0.15380598604679108, 'timestamp': '2025-09-30 22:30:11.875040', 'step': 14647, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:11.920554', 'step': 14647, 'epoch': 2} {'type': 'loss', 'content': 0.08968118578195572, 'timestamp': '2025-09-30 22:30:11.947058', 'step': 14648, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:11.984651', 'step': 14648, 'epoch': 2} {'type': 'loss', 'content': 0.12647663056850433, 'timestamp': '2025-09-30 22:30:11.989834', 'step': 14649, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:12.037383', 'step': 14649, 'epoch': 2} {'type': 'loss', 'content': 0.08912638574838638, 'timestamp': '2025-09-30 22:30:12.054199', 'step': 14650, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:12.099070', 'step': 14650, 'epoch': 2} {'type': 'loss', 'content': 0.12083695828914642, 'timestamp': '2025-09-30 22:30:12.115668', 'step': 14651, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:12.147557', 'step': 14651, 'epoch': 2} {'type': 'loss', 'content': 0.15560826659202576, 'timestamp': '2025-09-30 22:30:12.175903', 'step': 14652, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:12.206981', 'step': 14652, 'epoch': 2} {'type': 'loss', 'content': 0.17992374300956726, 'timestamp': '2025-09-30 22:30:12.213025', 'step': 14653, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:12.243977', 'step': 14653, 'epoch': 2} {'type': 'loss', 'content': 0.12004450708627701, 'timestamp': '2025-09-30 22:30:12.250376', 'step': 14654, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:12.304675', 'step': 14654, 'epoch': 2} {'type': 'loss', 'content': 0.1300342082977295, 'timestamp': '2025-09-30 22:30:12.310824', 'step': 14655, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:12.344589', 'step': 14655, 'epoch': 2} {'type': 'loss', 'content': 0.11189945787191391, 'timestamp': '2025-09-30 22:30:12.371382', 'step': 14656, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:12.403914', 'step': 14656, 'epoch': 2} {'type': 'loss', 'content': 0.11418470740318298, 'timestamp': '2025-09-30 22:30:12.408485', 'step': 14657, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:12.443877', 'step': 14657, 'epoch': 2} {'type': 'loss', 'content': 0.04783114418387413, 'timestamp': '2025-09-30 22:30:12.459679', 'step': 14658, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:12.504268', 'step': 14658, 'epoch': 2} {'type': 'loss', 'content': 0.06830574572086334, 'timestamp': '2025-09-30 22:30:12.507934', 'step': 14659, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:12.551713', 'step': 14659, 'epoch': 2} {'type': 'loss', 'content': 0.09536588937044144, 'timestamp': '2025-09-30 22:30:12.576622', 'step': 14660, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:12.608028', 'step': 14660, 'epoch': 2} {'type': 'loss', 'content': 0.050725847482681274, 'timestamp': '2025-09-30 22:30:12.612249', 'step': 14661, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:12.643563', 'step': 14661, 'epoch': 2} {'type': 'loss', 'content': 0.11183059960603714, 'timestamp': '2025-09-30 22:30:12.647933', 'step': 14662, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:12.692649', 'step': 14662, 'epoch': 2} {'type': 'loss', 'content': 0.18493592739105225, 'timestamp': '2025-09-30 22:30:12.698723', 'step': 14663, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:12.741456', 'step': 14663, 'epoch': 2} {'type': 'loss', 'content': 0.1153428852558136, 'timestamp': '2025-09-30 22:30:12.766519', 'step': 14664, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:12.799059', 'step': 14664, 'epoch': 2} {'type': 'loss', 'content': 0.09381368011236191, 'timestamp': '2025-09-30 22:30:12.814488', 'step': 14665, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:12.847957', 'step': 14665, 'epoch': 2} {'type': 'loss', 'content': 0.07799095660448074, 'timestamp': '2025-09-30 22:30:12.852991', 'step': 14666, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:12.896485', 'step': 14666, 'epoch': 2} {'type': 'loss', 'content': 0.22838400304317474, 'timestamp': '2025-09-30 22:30:12.900669', 'step': 14667, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:12.943681', 'step': 14667, 'epoch': 2} {'type': 'loss', 'content': 0.13040144741535187, 'timestamp': '2025-09-30 22:30:12.970827', 'step': 14668, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:13.004076', 'step': 14668, 'epoch': 2} {'type': 'loss', 'content': 0.07346311211585999, 'timestamp': '2025-09-30 22:30:13.008607', 'step': 14669, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:13.041485', 'step': 14669, 'epoch': 2} {'type': 'loss', 'content': 0.08102696388959885, 'timestamp': '2025-09-30 22:30:13.055346', 'step': 14670, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.088088', 'step': 14670, 'epoch': 2} {'type': 'loss', 'content': 0.0441831573843956, 'timestamp': '2025-09-30 22:30:13.093780', 'step': 14671, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:13.126449', 'step': 14671, 'epoch': 2} {'type': 'loss', 'content': 0.07072024792432785, 'timestamp': '2025-09-30 22:30:13.152471', 'step': 14672, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.191632', 'step': 14672, 'epoch': 2} {'type': 'loss', 'content': 0.08187659829854965, 'timestamp': '2025-09-30 22:30:13.196262', 'step': 14673, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:13.230397', 'step': 14673, 'epoch': 2} {'type': 'loss', 'content': 0.16878396272659302, 'timestamp': '2025-09-30 22:30:13.246435', 'step': 14674, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.290622', 'step': 14674, 'epoch': 2} {'type': 'loss', 'content': 0.06465080380439758, 'timestamp': '2025-09-30 22:30:13.307096', 'step': 14675, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.350420', 'step': 14675, 'epoch': 2} {'type': 'loss', 'content': 0.08408114314079285, 'timestamp': '2025-09-30 22:30:13.375478', 'step': 14676, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:13.408167', 'step': 14676, 'epoch': 2} {'type': 'loss', 'content': 0.11530093103647232, 'timestamp': '2025-09-30 22:30:13.413486', 'step': 14677, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.446617', 'step': 14677, 'epoch': 2} {'type': 'loss', 'content': 0.06234906613826752, 'timestamp': '2025-09-30 22:30:13.451339', 'step': 14678, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:13.484420', 'step': 14678, 'epoch': 2} {'type': 'loss', 'content': 0.06045766547322273, 'timestamp': '2025-09-30 22:30:13.489626', 'step': 14679, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:13.524457', 'step': 14679, 'epoch': 2} {'type': 'loss', 'content': 0.11847341805696487, 'timestamp': '2025-09-30 22:30:13.550752', 'step': 14680, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:13.583069', 'step': 14680, 'epoch': 2} {'type': 'loss', 'content': 0.093895822763443, 'timestamp': '2025-09-30 22:30:13.599106', 'step': 14681, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:13.633656', 'step': 14681, 'epoch': 2} {'type': 'loss', 'content': 0.08354318886995316, 'timestamp': '2025-09-30 22:30:13.651946', 'step': 14682, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:13.685916', 'step': 14682, 'epoch': 2} {'type': 'loss', 'content': 0.1038183867931366, 'timestamp': '2025-09-30 22:30:13.700561', 'step': 14683, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:13.735281', 'step': 14683, 'epoch': 2} {'type': 'loss', 'content': 0.09355625510215759, 'timestamp': '2025-09-30 22:30:13.762735', 'step': 14684, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:13.796744', 'step': 14684, 'epoch': 2} {'type': 'loss', 'content': 0.11780792474746704, 'timestamp': '2025-09-30 22:30:13.812657', 'step': 14685, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:13.845071', 'step': 14685, 'epoch': 2} {'type': 'loss', 'content': 0.11775744706392288, 'timestamp': '2025-09-30 22:30:13.860941', 'step': 14686, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:13.893730', 'step': 14686, 'epoch': 2} {'type': 'loss', 'content': 0.06319146603345871, 'timestamp': '2025-09-30 22:30:13.898408', 'step': 14687, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:13.932322', 'step': 14687, 'epoch': 2} {'type': 'loss', 'content': 0.054480087012052536, 'timestamp': '2025-09-30 22:30:13.956714', 'step': 14688, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:13.995599', 'step': 14688, 'epoch': 2} {'type': 'loss', 'content': 0.09614372998476028, 'timestamp': '2025-09-30 22:30:14.000423', 'step': 14689, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:14.034707', 'step': 14689, 'epoch': 2} {'type': 'loss', 'content': 0.08911016583442688, 'timestamp': '2025-09-30 22:30:14.051530', 'step': 14690, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.085825', 'step': 14690, 'epoch': 2} {'type': 'loss', 'content': 0.10150440037250519, 'timestamp': '2025-09-30 22:30:14.091319', 'step': 14691, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.125209', 'step': 14691, 'epoch': 2} {'type': 'loss', 'content': 0.0769452154636383, 'timestamp': '2025-09-30 22:30:14.152002', 'step': 14692, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.184808', 'step': 14692, 'epoch': 2} {'type': 'loss', 'content': 0.03904057294130325, 'timestamp': '2025-09-30 22:30:14.188495', 'step': 14693, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.220729', 'step': 14693, 'epoch': 2} {'type': 'loss', 'content': 0.06626414507627487, 'timestamp': '2025-09-30 22:30:14.224250', 'step': 14694, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.263661', 'step': 14694, 'epoch': 2} {'type': 'loss', 'content': 0.018221991136670113, 'timestamp': '2025-09-30 22:30:14.266567', 'step': 14695, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:14.298156', 'step': 14695, 'epoch': 2} {'type': 'loss', 'content': 0.039971522986888885, 'timestamp': '2025-09-30 22:30:14.326854', 'step': 14696, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.359262', 'step': 14696, 'epoch': 2} {'type': 'loss', 'content': 0.08313159644603729, 'timestamp': '2025-09-30 22:30:14.363711', 'step': 14697, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.395929', 'step': 14697, 'epoch': 2} {'type': 'loss', 'content': 0.04225798323750496, 'timestamp': '2025-09-30 22:30:14.413914', 'step': 14698, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.458929', 'step': 14698, 'epoch': 2} {'type': 'loss', 'content': 0.10374011844396591, 'timestamp': '2025-09-30 22:30:14.478514', 'step': 14699, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:14.521963', 'step': 14699, 'epoch': 2} {'type': 'loss', 'content': 0.13927406072616577, 'timestamp': '2025-09-30 22:30:14.547854', 'step': 14700, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:14.579926', 'step': 14700, 'epoch': 2} {'type': 'loss', 'content': 0.08492378890514374, 'timestamp': '2025-09-30 22:30:14.583699', 'step': 14701, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.615909', 'step': 14701, 'epoch': 2} {'type': 'loss', 'content': 0.03503449261188507, 'timestamp': '2025-09-30 22:30:14.619569', 'step': 14702, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.654385', 'step': 14702, 'epoch': 2} {'type': 'loss', 'content': 0.16058367490768433, 'timestamp': '2025-09-30 22:30:14.658471', 'step': 14703, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.701386', 'step': 14703, 'epoch': 2} {'type': 'loss', 'content': 0.06979308277368546, 'timestamp': '2025-09-30 22:30:14.728325', 'step': 14704, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.763298', 'step': 14704, 'epoch': 2} {'type': 'loss', 'content': 0.13749901950359344, 'timestamp': '2025-09-30 22:30:14.767430', 'step': 14705, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.800458', 'step': 14705, 'epoch': 2} {'type': 'loss', 'content': 0.10348810255527496, 'timestamp': '2025-09-30 22:30:14.806397', 'step': 14706, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:14.846962', 'step': 14706, 'epoch': 2} {'type': 'loss', 'content': 0.05032361298799515, 'timestamp': '2025-09-30 22:30:14.852482', 'step': 14707, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:14.886261', 'step': 14707, 'epoch': 2} {'type': 'loss', 'content': 0.14224998652935028, 'timestamp': '2025-09-30 22:30:14.921132', 'step': 14708, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:14.953989', 'step': 14708, 'epoch': 2} {'type': 'loss', 'content': 0.08263806253671646, 'timestamp': '2025-09-30 22:30:14.959236', 'step': 14709, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.009531', 'step': 14709, 'epoch': 2} {'type': 'loss', 'content': 0.07283997535705566, 'timestamp': '2025-09-30 22:30:15.021022', 'step': 14710, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.056446', 'step': 14710, 'epoch': 2} {'type': 'loss', 'content': 0.03899386525154114, 'timestamp': '2025-09-30 22:30:15.060688', 'step': 14711, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.103854', 'step': 14711, 'epoch': 2} {'type': 'loss', 'content': 0.10146123170852661, 'timestamp': '2025-09-30 22:30:15.132449', 'step': 14712, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.165103', 'step': 14712, 'epoch': 2} {'type': 'loss', 'content': 0.15180642902851105, 'timestamp': '2025-09-30 22:30:15.183627', 'step': 14713, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:15.220416', 'step': 14713, 'epoch': 2} {'type': 'loss', 'content': 0.10934225469827652, 'timestamp': '2025-09-30 22:30:15.237830', 'step': 14714, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.269605', 'step': 14714, 'epoch': 2} {'type': 'loss', 'content': 0.07853447645902634, 'timestamp': '2025-09-30 22:30:15.273404', 'step': 14715, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.306303', 'step': 14715, 'epoch': 2} {'type': 'loss', 'content': 0.14236527681350708, 'timestamp': '2025-09-30 22:30:15.331530', 'step': 14716, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:15.363875', 'step': 14716, 'epoch': 2} {'type': 'loss', 'content': 0.07100905478000641, 'timestamp': '2025-09-30 22:30:15.368237', 'step': 14717, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.400268', 'step': 14717, 'epoch': 2} {'type': 'loss', 'content': 0.10130815953016281, 'timestamp': '2025-09-30 22:30:15.404793', 'step': 14718, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.437956', 'step': 14718, 'epoch': 2} {'type': 'loss', 'content': 0.1428275853395462, 'timestamp': '2025-09-30 22:30:15.444161', 'step': 14719, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.478752', 'step': 14719, 'epoch': 2} {'type': 'loss', 'content': 0.14559270441532135, 'timestamp': '2025-09-30 22:30:15.506372', 'step': 14720, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:15.540460', 'step': 14720, 'epoch': 2} {'type': 'loss', 'content': 0.17622293531894684, 'timestamp': '2025-09-30 22:30:15.547301', 'step': 14721, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:15.584499', 'step': 14721, 'epoch': 2} {'type': 'loss', 'content': 0.0758252888917923, 'timestamp': '2025-09-30 22:30:15.588251', 'step': 14722, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.621638', 'step': 14722, 'epoch': 2} {'type': 'loss', 'content': 0.0628376230597496, 'timestamp': '2025-09-30 22:30:15.624689', 'step': 14723, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.660032', 'step': 14723, 'epoch': 2} {'type': 'loss', 'content': 0.06611624360084534, 'timestamp': '2025-09-30 22:30:15.697422', 'step': 14724, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.728573', 'step': 14724, 'epoch': 2} {'type': 'loss', 'content': 0.1727694272994995, 'timestamp': '2025-09-30 22:30:15.747954', 'step': 14725, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:15.790267', 'step': 14725, 'epoch': 2} {'type': 'loss', 'content': 0.10258255153894424, 'timestamp': '2025-09-30 22:30:15.796998', 'step': 14726, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.831261', 'step': 14726, 'epoch': 2} {'type': 'loss', 'content': 0.027191920205950737, 'timestamp': '2025-09-30 22:30:15.835887', 'step': 14727, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.868430', 'step': 14727, 'epoch': 2} {'type': 'loss', 'content': 0.14144361019134521, 'timestamp': '2025-09-30 22:30:15.894424', 'step': 14728, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:15.928740', 'step': 14728, 'epoch': 2} {'type': 'loss', 'content': 0.10970692336559296, 'timestamp': '2025-09-30 22:30:15.934411', 'step': 14729, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:15.969522', 'step': 14729, 'epoch': 2} {'type': 'loss', 'content': 0.08728118240833282, 'timestamp': '2025-09-30 22:30:15.985370', 'step': 14730, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.030289', 'step': 14730, 'epoch': 2} {'type': 'loss', 'content': 0.13182255625724792, 'timestamp': '2025-09-30 22:30:16.047376', 'step': 14731, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:16.081795', 'step': 14731, 'epoch': 2} {'type': 'loss', 'content': 0.1280948370695114, 'timestamp': '2025-09-30 22:30:16.118135', 'step': 14732, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.152116', 'step': 14732, 'epoch': 2} {'type': 'loss', 'content': 0.1305554062128067, 'timestamp': '2025-09-30 22:30:16.155352', 'step': 14733, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:16.199756', 'step': 14733, 'epoch': 2} {'type': 'loss', 'content': 0.07981331646442413, 'timestamp': '2025-09-30 22:30:16.216775', 'step': 14734, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.261146', 'step': 14734, 'epoch': 2} {'type': 'loss', 'content': 0.05453644320368767, 'timestamp': '2025-09-30 22:30:16.279807', 'step': 14735, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:16.323726', 'step': 14735, 'epoch': 2} {'type': 'loss', 'content': 0.14816226065158844, 'timestamp': '2025-09-30 22:30:16.363930', 'step': 14736, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.407274', 'step': 14736, 'epoch': 2} {'type': 'loss', 'content': 0.0376955047249794, 'timestamp': '2025-09-30 22:30:16.412465', 'step': 14737, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:16.448193', 'step': 14737, 'epoch': 2} {'type': 'loss', 'content': 0.14286425709724426, 'timestamp': '2025-09-30 22:30:16.464786', 'step': 14738, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.504042', 'step': 14738, 'epoch': 2} {'type': 'loss', 'content': 0.051051847636699677, 'timestamp': '2025-09-30 22:30:16.508776', 'step': 14739, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.543416', 'step': 14739, 'epoch': 2} {'type': 'loss', 'content': 0.06540889292955399, 'timestamp': '2025-09-30 22:30:16.572282', 'step': 14740, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.606182', 'step': 14740, 'epoch': 2} {'type': 'loss', 'content': 0.19278329610824585, 'timestamp': '2025-09-30 22:30:16.626008', 'step': 14741, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.658606', 'step': 14741, 'epoch': 2} {'type': 'loss', 'content': 0.05737129598855972, 'timestamp': '2025-09-30 22:30:16.673572', 'step': 14742, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.719149', 'step': 14742, 'epoch': 2} {'type': 'loss', 'content': 0.07975177466869354, 'timestamp': '2025-09-30 22:30:16.727889', 'step': 14743, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.759535', 'step': 14743, 'epoch': 2} {'type': 'loss', 'content': 0.004767231177538633, 'timestamp': '2025-09-30 22:30:16.796376', 'step': 14744, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:16.838793', 'step': 14744, 'epoch': 2} {'type': 'loss', 'content': 0.031123608350753784, 'timestamp': '2025-09-30 22:30:16.842962', 'step': 14745, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:16.874540', 'step': 14745, 'epoch': 2} {'type': 'loss', 'content': 0.1174323633313179, 'timestamp': '2025-09-30 22:30:16.880857', 'step': 14746, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:16.912486', 'step': 14746, 'epoch': 2} {'type': 'loss', 'content': 0.0785009041428566, 'timestamp': '2025-09-30 22:30:16.917006', 'step': 14747, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:16.953330', 'step': 14747, 'epoch': 2} {'type': 'loss', 'content': 0.08794993162155151, 'timestamp': '2025-09-30 22:30:16.980598', 'step': 14748, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:17.024224', 'step': 14748, 'epoch': 2} {'type': 'loss', 'content': 0.05474277585744858, 'timestamp': '2025-09-30 22:30:17.039996', 'step': 14749, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.071858', 'step': 14749, 'epoch': 2} {'type': 'loss', 'content': 0.098300501704216, 'timestamp': '2025-09-30 22:30:17.074829', 'step': 14750, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:17.118680', 'step': 14750, 'epoch': 2} {'type': 'loss', 'content': 0.11602234095335007, 'timestamp': '2025-09-30 22:30:17.122817', 'step': 14751, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.154784', 'step': 14751, 'epoch': 2} {'type': 'loss', 'content': 0.09535104781389236, 'timestamp': '2025-09-30 22:30:17.188253', 'step': 14752, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.234710', 'step': 14752, 'epoch': 2} {'type': 'loss', 'content': 0.09641347080469131, 'timestamp': '2025-09-30 22:30:17.240658', 'step': 14753, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.273334', 'step': 14753, 'epoch': 2} {'type': 'loss', 'content': 0.07092523574829102, 'timestamp': '2025-09-30 22:30:17.290633', 'step': 14754, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.322438', 'step': 14754, 'epoch': 2} {'type': 'loss', 'content': 0.08840692788362503, 'timestamp': '2025-09-30 22:30:17.328389', 'step': 14755, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.360817', 'step': 14755, 'epoch': 2} {'type': 'loss', 'content': 0.10954716056585312, 'timestamp': '2025-09-30 22:30:17.388141', 'step': 14756, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:17.433217', 'step': 14756, 'epoch': 2} {'type': 'loss', 'content': 0.07834550738334656, 'timestamp': '2025-09-30 22:30:17.436972', 'step': 14757, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.470510', 'step': 14757, 'epoch': 2} {'type': 'loss', 'content': 0.1172865629196167, 'timestamp': '2025-09-30 22:30:17.474372', 'step': 14758, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.517219', 'step': 14758, 'epoch': 2} {'type': 'loss', 'content': 0.05922756344079971, 'timestamp': '2025-09-30 22:30:17.522374', 'step': 14759, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:17.566368', 'step': 14759, 'epoch': 2} {'type': 'loss', 'content': 0.045729901641607285, 'timestamp': '2025-09-30 22:30:17.598152', 'step': 14760, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:17.631005', 'step': 14760, 'epoch': 2} {'type': 'loss', 'content': 0.0986664667725563, 'timestamp': '2025-09-30 22:30:17.646454', 'step': 14761, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.690486', 'step': 14761, 'epoch': 2} {'type': 'loss', 'content': 0.11075625568628311, 'timestamp': '2025-09-30 22:30:17.696932', 'step': 14762, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:17.729344', 'step': 14762, 'epoch': 2} {'type': 'loss', 'content': 0.11462860554456711, 'timestamp': '2025-09-30 22:30:17.733507', 'step': 14763, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:17.771603', 'step': 14763, 'epoch': 2} {'type': 'loss', 'content': 0.050220586359500885, 'timestamp': '2025-09-30 22:30:17.796959', 'step': 14764, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:17.829421', 'step': 14764, 'epoch': 2} {'type': 'loss', 'content': 0.07650107145309448, 'timestamp': '2025-09-30 22:30:17.834248', 'step': 14765, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.877701', 'step': 14765, 'epoch': 2} {'type': 'loss', 'content': 0.10573115199804306, 'timestamp': '2025-09-30 22:30:17.881541', 'step': 14766, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:17.915880', 'step': 14766, 'epoch': 2} {'type': 'loss', 'content': 0.07344174385070801, 'timestamp': '2025-09-30 22:30:17.920399', 'step': 14767, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:17.953161', 'step': 14767, 'epoch': 2} {'type': 'loss', 'content': 0.09926541894674301, 'timestamp': '2025-09-30 22:30:17.980010', 'step': 14768, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.027567', 'step': 14768, 'epoch': 2} {'type': 'loss', 'content': 0.12042766064405441, 'timestamp': '2025-09-30 22:30:18.043315', 'step': 14769, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:18.077424', 'step': 14769, 'epoch': 2} {'type': 'loss', 'content': 0.09281953424215317, 'timestamp': '2025-09-30 22:30:18.082291', 'step': 14770, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:18.114745', 'step': 14770, 'epoch': 2} {'type': 'loss', 'content': 0.07868490368127823, 'timestamp': '2025-09-30 22:30:18.119004', 'step': 14771, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.150162', 'step': 14771, 'epoch': 2} {'type': 'loss', 'content': 0.09233222156763077, 'timestamp': '2025-09-30 22:30:18.174635', 'step': 14772, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:18.206324', 'step': 14772, 'epoch': 2} {'type': 'loss', 'content': 0.11633076518774033, 'timestamp': '2025-09-30 22:30:18.224209', 'step': 14773, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:18.271426', 'step': 14773, 'epoch': 2} {'type': 'loss', 'content': 0.11670608818531036, 'timestamp': '2025-09-30 22:30:18.288353', 'step': 14774, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:18.332286', 'step': 14774, 'epoch': 2} {'type': 'loss', 'content': 0.12809444963932037, 'timestamp': '2025-09-30 22:30:18.349111', 'step': 14775, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:18.382377', 'step': 14775, 'epoch': 2} {'type': 'loss', 'content': 0.07242324948310852, 'timestamp': '2025-09-30 22:30:18.408819', 'step': 14776, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:18.450384', 'step': 14776, 'epoch': 2} {'type': 'loss', 'content': 0.1209411472082138, 'timestamp': '2025-09-30 22:30:18.463416', 'step': 14777, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:18.496018', 'step': 14777, 'epoch': 2} {'type': 'loss', 'content': 0.12023808062076569, 'timestamp': '2025-09-30 22:30:18.507222', 'step': 14778, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.543161', 'step': 14778, 'epoch': 2} {'type': 'loss', 'content': 0.06199169158935547, 'timestamp': '2025-09-30 22:30:18.548502', 'step': 14779, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.585017', 'step': 14779, 'epoch': 2} {'type': 'loss', 'content': 0.116571806371212, 'timestamp': '2025-09-30 22:30:18.611236', 'step': 14780, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.644190', 'step': 14780, 'epoch': 2} {'type': 'loss', 'content': 0.2637651860713959, 'timestamp': '2025-09-30 22:30:18.649120', 'step': 14781, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:18.696381', 'step': 14781, 'epoch': 2} {'type': 'loss', 'content': 0.10079002380371094, 'timestamp': '2025-09-30 22:30:18.702431', 'step': 14782, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.735040', 'step': 14782, 'epoch': 2} {'type': 'loss', 'content': 0.07726174592971802, 'timestamp': '2025-09-30 22:30:18.739428', 'step': 14783, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:18.774197', 'step': 14783, 'epoch': 2} {'type': 'loss', 'content': 0.02265227399766445, 'timestamp': '2025-09-30 22:30:18.800941', 'step': 14784, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:18.835501', 'step': 14784, 'epoch': 2} {'type': 'loss', 'content': 0.08859649300575256, 'timestamp': '2025-09-30 22:30:18.840405', 'step': 14785, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:18.875050', 'step': 14785, 'epoch': 2} {'type': 'loss', 'content': 0.12053821980953217, 'timestamp': '2025-09-30 22:30:18.889078', 'step': 14786, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:18.923081', 'step': 14786, 'epoch': 2} {'type': 'loss', 'content': 0.09437940269708633, 'timestamp': '2025-09-30 22:30:18.927399', 'step': 14787, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:18.959406', 'step': 14787, 'epoch': 2} {'type': 'loss', 'content': 0.09291792660951614, 'timestamp': '2025-09-30 22:30:18.986088', 'step': 14788, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:19.029980', 'step': 14788, 'epoch': 2} {'type': 'loss', 'content': 0.08262445032596588, 'timestamp': '2025-09-30 22:30:19.047369', 'step': 14789, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.084812', 'step': 14789, 'epoch': 2} {'type': 'loss', 'content': 0.1815759688615799, 'timestamp': '2025-09-30 22:30:19.106372', 'step': 14790, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.155049', 'step': 14790, 'epoch': 2} {'type': 'loss', 'content': 0.07183988392353058, 'timestamp': '2025-09-30 22:30:19.161454', 'step': 14791, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.197289', 'step': 14791, 'epoch': 2} {'type': 'loss', 'content': 0.15177080035209656, 'timestamp': '2025-09-30 22:30:19.224914', 'step': 14792, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.257946', 'step': 14792, 'epoch': 2} {'type': 'loss', 'content': 0.09361498057842255, 'timestamp': '2025-09-30 22:30:19.263251', 'step': 14793, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.306172', 'step': 14793, 'epoch': 2} {'type': 'loss', 'content': 0.05857541412115097, 'timestamp': '2025-09-30 22:30:19.310337', 'step': 14794, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.343945', 'step': 14794, 'epoch': 2} {'type': 'loss', 'content': 0.07249518483877182, 'timestamp': '2025-09-30 22:30:19.356633', 'step': 14795, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.390327', 'step': 14795, 'epoch': 2} {'type': 'loss', 'content': 0.08790156990289688, 'timestamp': '2025-09-30 22:30:19.416079', 'step': 14796, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.461104', 'step': 14796, 'epoch': 2} {'type': 'loss', 'content': 0.13897722959518433, 'timestamp': '2025-09-30 22:30:19.464474', 'step': 14797, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.495783', 'step': 14797, 'epoch': 2} {'type': 'loss', 'content': 0.053463298827409744, 'timestamp': '2025-09-30 22:30:19.500906', 'step': 14798, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:19.533798', 'step': 14798, 'epoch': 2} {'type': 'loss', 'content': 0.12654449045658112, 'timestamp': '2025-09-30 22:30:19.540682', 'step': 14799, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.575771', 'step': 14799, 'epoch': 2} {'type': 'loss', 'content': 0.11713305860757828, 'timestamp': '2025-09-30 22:30:19.602840', 'step': 14800, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:19.643560', 'step': 14800, 'epoch': 2} {'type': 'loss', 'content': 0.1331397444009781, 'timestamp': '2025-09-30 22:30:19.646910', 'step': 14801, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.679275', 'step': 14801, 'epoch': 2} {'type': 'loss', 'content': 0.1440756469964981, 'timestamp': '2025-09-30 22:30:19.684234', 'step': 14802, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:19.717820', 'step': 14802, 'epoch': 2} {'type': 'loss', 'content': 0.07987015694379807, 'timestamp': '2025-09-30 22:30:19.723044', 'step': 14803, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:19.758189', 'step': 14803, 'epoch': 2} {'type': 'loss', 'content': 0.12779471278190613, 'timestamp': '2025-09-30 22:30:19.783237', 'step': 14804, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.816276', 'step': 14804, 'epoch': 2} {'type': 'loss', 'content': 0.04846711829304695, 'timestamp': '2025-09-30 22:30:19.820600', 'step': 14805, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.863450', 'step': 14805, 'epoch': 2} {'type': 'loss', 'content': 0.12326716631650925, 'timestamp': '2025-09-30 22:30:19.869082', 'step': 14806, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.901456', 'step': 14806, 'epoch': 2} {'type': 'loss', 'content': 0.05940733104944229, 'timestamp': '2025-09-30 22:30:19.906057', 'step': 14807, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:19.939811', 'step': 14807, 'epoch': 2} {'type': 'loss', 'content': 0.0888429507613182, 'timestamp': '2025-09-30 22:30:19.966974', 'step': 14808, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:19.997883', 'step': 14808, 'epoch': 2} {'type': 'loss', 'content': 0.05414992943406105, 'timestamp': '2025-09-30 22:30:20.004124', 'step': 14809, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:20.037759', 'step': 14809, 'epoch': 2} {'type': 'loss', 'content': 0.0694417655467987, 'timestamp': '2025-09-30 22:30:20.041890', 'step': 14810, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:20.073483', 'step': 14810, 'epoch': 2} {'type': 'loss', 'content': 0.08351756632328033, 'timestamp': '2025-09-30 22:30:20.079248', 'step': 14811, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.114839', 'step': 14811, 'epoch': 2} {'type': 'loss', 'content': 0.12603121995925903, 'timestamp': '2025-09-30 22:30:20.155125', 'step': 14812, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:20.189338', 'step': 14812, 'epoch': 2} {'type': 'loss', 'content': 0.10589991509914398, 'timestamp': '2025-09-30 22:30:20.196692', 'step': 14813, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.240649', 'step': 14813, 'epoch': 2} {'type': 'loss', 'content': 0.07751563936471939, 'timestamp': '2025-09-30 22:30:20.247053', 'step': 14814, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.285268', 'step': 14814, 'epoch': 2} {'type': 'loss', 'content': 0.1404518336057663, 'timestamp': '2025-09-30 22:30:20.290542', 'step': 14815, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:20.324159', 'step': 14815, 'epoch': 2} {'type': 'loss', 'content': 0.0817946195602417, 'timestamp': '2025-09-30 22:30:20.362303', 'step': 14816, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:20.406511', 'step': 14816, 'epoch': 2} {'type': 'loss', 'content': 0.17740498483181, 'timestamp': '2025-09-30 22:30:20.412124', 'step': 14817, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:20.445656', 'step': 14817, 'epoch': 2} {'type': 'loss', 'content': 0.05456998199224472, 'timestamp': '2025-09-30 22:30:20.453377', 'step': 14818, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.487494', 'step': 14818, 'epoch': 2} {'type': 'loss', 'content': 0.12861686944961548, 'timestamp': '2025-09-30 22:30:20.507682', 'step': 14819, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:20.554591', 'step': 14819, 'epoch': 2} {'type': 'loss', 'content': 0.06783691048622131, 'timestamp': '2025-09-30 22:30:20.590471', 'step': 14820, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:20.627380', 'step': 14820, 'epoch': 2} {'type': 'loss', 'content': 0.15634387731552124, 'timestamp': '2025-09-30 22:30:20.631505', 'step': 14821, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.676950', 'step': 14821, 'epoch': 2} {'type': 'loss', 'content': 0.09303992986679077, 'timestamp': '2025-09-30 22:30:20.682078', 'step': 14822, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:20.721182', 'step': 14822, 'epoch': 2} {'type': 'loss', 'content': 0.026475707069039345, 'timestamp': '2025-09-30 22:30:20.725866', 'step': 14823, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.770810', 'step': 14823, 'epoch': 2} {'type': 'loss', 'content': 0.10463931411504745, 'timestamp': '2025-09-30 22:30:20.796833', 'step': 14824, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.834567', 'step': 14824, 'epoch': 2} {'type': 'loss', 'content': 0.18361054360866547, 'timestamp': '2025-09-30 22:30:20.840886', 'step': 14825, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:20.901544', 'step': 14825, 'epoch': 2} {'type': 'loss', 'content': 0.05668051540851593, 'timestamp': '2025-09-30 22:30:20.918408', 'step': 14826, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:20.962209', 'step': 14826, 'epoch': 2} {'type': 'loss', 'content': 0.044812969863414764, 'timestamp': '2025-09-30 22:30:20.967670', 'step': 14827, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:21.013027', 'step': 14827, 'epoch': 2} {'type': 'loss', 'content': 0.1286293864250183, 'timestamp': '2025-09-30 22:30:21.039253', 'step': 14828, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.080514', 'step': 14828, 'epoch': 2} {'type': 'loss', 'content': 0.11577674001455307, 'timestamp': '2025-09-30 22:30:21.098756', 'step': 14829, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.144161', 'step': 14829, 'epoch': 2} {'type': 'loss', 'content': 0.11695826053619385, 'timestamp': '2025-09-30 22:30:21.149498', 'step': 14830, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:21.188884', 'step': 14830, 'epoch': 2} {'type': 'loss', 'content': 0.09706638008356094, 'timestamp': '2025-09-30 22:30:21.193144', 'step': 14831, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.240980', 'step': 14831, 'epoch': 2} {'type': 'loss', 'content': 0.09315608441829681, 'timestamp': '2025-09-30 22:30:21.267564', 'step': 14832, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.303327', 'step': 14832, 'epoch': 2} {'type': 'loss', 'content': 0.14240115880966187, 'timestamp': '2025-09-30 22:30:21.311389', 'step': 14833, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:21.345291', 'step': 14833, 'epoch': 2} {'type': 'loss', 'content': 0.1476687341928482, 'timestamp': '2025-09-30 22:30:21.349685', 'step': 14834, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.382995', 'step': 14834, 'epoch': 2} {'type': 'loss', 'content': 0.0802820548415184, 'timestamp': '2025-09-30 22:30:21.403606', 'step': 14835, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:21.446277', 'step': 14835, 'epoch': 2} {'type': 'loss', 'content': 0.16341975331306458, 'timestamp': '2025-09-30 22:30:21.483856', 'step': 14836, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:21.531260', 'step': 14836, 'epoch': 2} {'type': 'loss', 'content': 0.10037074238061905, 'timestamp': '2025-09-30 22:30:21.535625', 'step': 14837, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:21.577943', 'step': 14837, 'epoch': 2} {'type': 'loss', 'content': 0.06091175228357315, 'timestamp': '2025-09-30 22:30:21.584190', 'step': 14838, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.625165', 'step': 14838, 'epoch': 2} {'type': 'loss', 'content': 0.058466967195272446, 'timestamp': '2025-09-30 22:30:21.629157', 'step': 14839, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:21.661944', 'step': 14839, 'epoch': 2} {'type': 'loss', 'content': 0.13644841313362122, 'timestamp': '2025-09-30 22:30:21.690077', 'step': 14840, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.764273', 'step': 14840, 'epoch': 2} {'type': 'loss', 'content': 0.1338604986667633, 'timestamp': '2025-09-30 22:30:21.767658', 'step': 14841, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:21.808160', 'step': 14841, 'epoch': 2} {'type': 'loss', 'content': 0.08757027983665466, 'timestamp': '2025-09-30 22:30:21.824576', 'step': 14842, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.882351', 'step': 14842, 'epoch': 2} {'type': 'loss', 'content': 0.08046936988830566, 'timestamp': '2025-09-30 22:30:21.887832', 'step': 14843, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.918948', 'step': 14843, 'epoch': 2} {'type': 'loss', 'content': 0.09578200429677963, 'timestamp': '2025-09-30 22:30:21.946866', 'step': 14844, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:21.977616', 'step': 14844, 'epoch': 2} {'type': 'loss', 'content': 0.12484423816204071, 'timestamp': '2025-09-30 22:30:21.980963', 'step': 14845, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.023988', 'step': 14845, 'epoch': 2} {'type': 'loss', 'content': 0.09320323169231415, 'timestamp': '2025-09-30 22:30:22.028050', 'step': 14846, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.060502', 'step': 14846, 'epoch': 2} {'type': 'loss', 'content': 0.034054167568683624, 'timestamp': '2025-09-30 22:30:22.077598', 'step': 14847, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.126392', 'step': 14847, 'epoch': 2} {'type': 'loss', 'content': 0.08987627923488617, 'timestamp': '2025-09-30 22:30:22.159789', 'step': 14848, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.214006', 'step': 14848, 'epoch': 2} {'type': 'loss', 'content': 0.132141575217247, 'timestamp': '2025-09-30 22:30:22.229231', 'step': 14849, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.263351', 'step': 14849, 'epoch': 2} {'type': 'loss', 'content': 0.0727895125746727, 'timestamp': '2025-09-30 22:30:22.279338', 'step': 14850, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.312313', 'step': 14850, 'epoch': 2} {'type': 'loss', 'content': 0.14436909556388855, 'timestamp': '2025-09-30 22:30:22.316224', 'step': 14851, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.360236', 'step': 14851, 'epoch': 2} {'type': 'loss', 'content': 0.11373808234930038, 'timestamp': '2025-09-30 22:30:22.386032', 'step': 14852, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:22.421155', 'step': 14852, 'epoch': 2} {'type': 'loss', 'content': 0.09915181994438171, 'timestamp': '2025-09-30 22:30:22.426045', 'step': 14853, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.458900', 'step': 14853, 'epoch': 2} {'type': 'loss', 'content': 0.12216592580080032, 'timestamp': '2025-09-30 22:30:22.463064', 'step': 14854, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.496271', 'step': 14854, 'epoch': 2} {'type': 'loss', 'content': 0.05913157016038895, 'timestamp': '2025-09-30 22:30:22.499776', 'step': 14855, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.535129', 'step': 14855, 'epoch': 2} {'type': 'loss', 'content': 0.08177255839109421, 'timestamp': '2025-09-30 22:30:22.560314', 'step': 14856, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:22.593290', 'step': 14856, 'epoch': 2} {'type': 'loss', 'content': 0.1131070926785469, 'timestamp': '2025-09-30 22:30:22.598139', 'step': 14857, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:22.630320', 'step': 14857, 'epoch': 2} {'type': 'loss', 'content': 0.06067430227994919, 'timestamp': '2025-09-30 22:30:22.645247', 'step': 14858, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.685467', 'step': 14858, 'epoch': 2} {'type': 'loss', 'content': 0.08431416004896164, 'timestamp': '2025-09-30 22:30:22.689972', 'step': 14859, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.721667', 'step': 14859, 'epoch': 2} {'type': 'loss', 'content': 0.07239014655351639, 'timestamp': '2025-09-30 22:30:22.747953', 'step': 14860, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:22.793028', 'step': 14860, 'epoch': 2} {'type': 'loss', 'content': 0.13237124681472778, 'timestamp': '2025-09-30 22:30:22.799144', 'step': 14861, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.831882', 'step': 14861, 'epoch': 2} {'type': 'loss', 'content': 0.10264100134372711, 'timestamp': '2025-09-30 22:30:22.847633', 'step': 14862, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:22.893076', 'step': 14862, 'epoch': 2} {'type': 'loss', 'content': 0.10854166001081467, 'timestamp': '2025-09-30 22:30:22.896835', 'step': 14863, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:22.927533', 'step': 14863, 'epoch': 2} {'type': 'loss', 'content': 0.1127604991197586, 'timestamp': '2025-09-30 22:30:22.965233', 'step': 14864, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:23.009033', 'step': 14864, 'epoch': 2} {'type': 'loss', 'content': 0.07828142493963242, 'timestamp': '2025-09-30 22:30:23.014048', 'step': 14865, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.048756', 'step': 14865, 'epoch': 2} {'type': 'loss', 'content': 0.08888296782970428, 'timestamp': '2025-09-30 22:30:23.053833', 'step': 14866, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.086650', 'step': 14866, 'epoch': 2} {'type': 'loss', 'content': 0.08787798136472702, 'timestamp': '2025-09-30 22:30:23.091053', 'step': 14867, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.123059', 'step': 14867, 'epoch': 2} {'type': 'loss', 'content': 0.25192391872406006, 'timestamp': '2025-09-30 22:30:23.149948', 'step': 14868, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.183675', 'step': 14868, 'epoch': 2} {'type': 'loss', 'content': 0.17802734673023224, 'timestamp': '2025-09-30 22:30:23.186919', 'step': 14869, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.230668', 'step': 14869, 'epoch': 2} {'type': 'loss', 'content': 0.07933539897203445, 'timestamp': '2025-09-30 22:30:23.246806', 'step': 14870, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.280700', 'step': 14870, 'epoch': 2} {'type': 'loss', 'content': 0.024637840688228607, 'timestamp': '2025-09-30 22:30:23.294721', 'step': 14871, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.328755', 'step': 14871, 'epoch': 2} {'type': 'loss', 'content': 0.06455430388450623, 'timestamp': '2025-09-30 22:30:23.355552', 'step': 14872, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:23.398462', 'step': 14872, 'epoch': 2} {'type': 'loss', 'content': 0.11066404730081558, 'timestamp': '2025-09-30 22:30:23.404338', 'step': 14873, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.443167', 'step': 14873, 'epoch': 2} {'type': 'loss', 'content': 0.03367818892002106, 'timestamp': '2025-09-30 22:30:23.448545', 'step': 14874, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.485923', 'step': 14874, 'epoch': 2} {'type': 'loss', 'content': 0.03830922767519951, 'timestamp': '2025-09-30 22:30:23.490868', 'step': 14875, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.523862', 'step': 14875, 'epoch': 2} {'type': 'loss', 'content': 0.039810895919799805, 'timestamp': '2025-09-30 22:30:23.549772', 'step': 14876, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:23.595992', 'step': 14876, 'epoch': 2} {'type': 'loss', 'content': 0.14631985127925873, 'timestamp': '2025-09-30 22:30:23.601717', 'step': 14877, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:23.646947', 'step': 14877, 'epoch': 2} {'type': 'loss', 'content': 0.07342203706502914, 'timestamp': '2025-09-30 22:30:23.651644', 'step': 14878, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.682793', 'step': 14878, 'epoch': 2} {'type': 'loss', 'content': 0.13713641464710236, 'timestamp': '2025-09-30 22:30:23.688343', 'step': 14879, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:23.723762', 'step': 14879, 'epoch': 2} {'type': 'loss', 'content': 0.10251219570636749, 'timestamp': '2025-09-30 22:30:23.760553', 'step': 14880, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:23.794440', 'step': 14880, 'epoch': 2} {'type': 'loss', 'content': 0.02420332469046116, 'timestamp': '2025-09-30 22:30:23.805726', 'step': 14881, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.839340', 'step': 14881, 'epoch': 2} {'type': 'loss', 'content': 0.13797660171985626, 'timestamp': '2025-09-30 22:30:23.856829', 'step': 14882, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.911708', 'step': 14882, 'epoch': 2} {'type': 'loss', 'content': 0.1972929686307907, 'timestamp': '2025-09-30 22:30:23.930998', 'step': 14883, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:23.975673', 'step': 14883, 'epoch': 2} {'type': 'loss', 'content': 0.08103968948125839, 'timestamp': '2025-09-30 22:30:24.013052', 'step': 14884, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.045944', 'step': 14884, 'epoch': 2} {'type': 'loss', 'content': 0.12779371440410614, 'timestamp': '2025-09-30 22:30:24.051792', 'step': 14885, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.093935', 'step': 14885, 'epoch': 2} {'type': 'loss', 'content': 0.05632619559764862, 'timestamp': '2025-09-30 22:30:24.101041', 'step': 14886, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.134018', 'step': 14886, 'epoch': 2} {'type': 'loss', 'content': 0.0646173506975174, 'timestamp': '2025-09-30 22:30:24.138721', 'step': 14887, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.172990', 'step': 14887, 'epoch': 2} {'type': 'loss', 'content': 0.05692053586244583, 'timestamp': '2025-09-30 22:30:24.199813', 'step': 14888, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:24.233651', 'step': 14888, 'epoch': 2} {'type': 'loss', 'content': 0.1386491060256958, 'timestamp': '2025-09-30 22:30:24.239029', 'step': 14889, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.270499', 'step': 14889, 'epoch': 2} {'type': 'loss', 'content': 0.07927898317575455, 'timestamp': '2025-09-30 22:30:24.281244', 'step': 14890, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:24.314321', 'step': 14890, 'epoch': 2} {'type': 'loss', 'content': 0.16510707139968872, 'timestamp': '2025-09-30 22:30:24.319445', 'step': 14891, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:24.352239', 'step': 14891, 'epoch': 2} {'type': 'loss', 'content': 0.15681709349155426, 'timestamp': '2025-09-30 22:30:24.377838', 'step': 14892, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.415607', 'step': 14892, 'epoch': 2} {'type': 'loss', 'content': 0.0755491778254509, 'timestamp': '2025-09-30 22:30:24.420615', 'step': 14893, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:24.454189', 'step': 14893, 'epoch': 2} {'type': 'loss', 'content': 0.04814311861991882, 'timestamp': '2025-09-30 22:30:24.469080', 'step': 14894, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.513761', 'step': 14894, 'epoch': 2} {'type': 'loss', 'content': 0.1152920052409172, 'timestamp': '2025-09-30 22:30:24.517564', 'step': 14895, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.551092', 'step': 14895, 'epoch': 2} {'type': 'loss', 'content': 0.09153074026107788, 'timestamp': '2025-09-30 22:30:24.577355', 'step': 14896, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:24.611401', 'step': 14896, 'epoch': 2} {'type': 'loss', 'content': 0.11232330650091171, 'timestamp': '2025-09-30 22:30:24.629583', 'step': 14897, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.674230', 'step': 14897, 'epoch': 2} {'type': 'loss', 'content': 0.05705881491303444, 'timestamp': '2025-09-30 22:30:24.679102', 'step': 14898, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:24.710259', 'step': 14898, 'epoch': 2} {'type': 'loss', 'content': 0.0977540835738182, 'timestamp': '2025-09-30 22:30:24.725390', 'step': 14899, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:24.771432', 'step': 14899, 'epoch': 2} {'type': 'loss', 'content': 0.1303933709859848, 'timestamp': '2025-09-30 22:30:24.798165', 'step': 14900, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.829844', 'step': 14900, 'epoch': 2} {'type': 'loss', 'content': 0.09937221556901932, 'timestamp': '2025-09-30 22:30:24.835808', 'step': 14901, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:24.878817', 'step': 14901, 'epoch': 2} {'type': 'loss', 'content': 0.09381525218486786, 'timestamp': '2025-09-30 22:30:24.887558', 'step': 14902, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:30:24.920460', 'step': 14902, 'epoch': 2} {'type': 'loss', 'content': 0.13848622143268585, 'timestamp': '2025-09-30 22:30:24.925057', 'step': 14903, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:24.966158', 'step': 14903, 'epoch': 2} {'type': 'loss', 'content': 0.04928318411111832, 'timestamp': '2025-09-30 22:30:24.993818', 'step': 14904, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.027398', 'step': 14904, 'epoch': 2} {'type': 'loss', 'content': 0.06864757835865021, 'timestamp': '2025-09-30 22:30:25.046035', 'step': 14905, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.090105', 'step': 14905, 'epoch': 2} {'type': 'loss', 'content': 0.09281913191080093, 'timestamp': '2025-09-30 22:30:25.093975', 'step': 14906, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.138645', 'step': 14906, 'epoch': 2} {'type': 'loss', 'content': 0.17297060787677765, 'timestamp': '2025-09-30 22:30:25.143980', 'step': 14907, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.179068', 'step': 14907, 'epoch': 2} {'type': 'loss', 'content': 0.15075717866420746, 'timestamp': '2025-09-30 22:30:25.204628', 'step': 14908, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:25.238507', 'step': 14908, 'epoch': 2} {'type': 'loss', 'content': 0.10047627985477448, 'timestamp': '2025-09-30 22:30:25.242057', 'step': 14909, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.273052', 'step': 14909, 'epoch': 2} {'type': 'loss', 'content': 0.112497977912426, 'timestamp': '2025-09-30 22:30:25.277106', 'step': 14910, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:25.311102', 'step': 14910, 'epoch': 2} {'type': 'loss', 'content': 0.09272249042987823, 'timestamp': '2025-09-30 22:30:25.314379', 'step': 14911, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:25.346328', 'step': 14911, 'epoch': 2} {'type': 'loss', 'content': 0.03866181522607803, 'timestamp': '2025-09-30 22:30:25.372046', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:30:33.053995', 'step': 14912, 'epoch': 2} {'type': 'pplx', 'content': 8701.004348905966, 'timestamp': '2025-09-30 22:30:33.060454', 'step': 14912, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:33.092377', 'step': 14912, 'epoch': 2} {'type': 'loss', 'content': 0.06935402750968933, 'timestamp': '2025-09-30 22:30:33.099988', 'step': 14913, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.146735', 'step': 14913, 'epoch': 2} {'type': 'loss', 'content': 0.03340218588709831, 'timestamp': '2025-09-30 22:30:33.151831', 'step': 14914, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:33.186165', 'step': 14914, 'epoch': 2} {'type': 'loss', 'content': 0.3112967610359192, 'timestamp': '2025-09-30 22:30:33.202751', 'step': 14915, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:33.247673', 'step': 14915, 'epoch': 2} {'type': 'loss', 'content': 0.08198253810405731, 'timestamp': '2025-09-30 22:30:33.273888', 'step': 14916, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.319274', 'step': 14916, 'epoch': 2} {'type': 'loss', 'content': 0.10091134905815125, 'timestamp': '2025-09-30 22:30:33.323338', 'step': 14917, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.367696', 'step': 14917, 'epoch': 2} {'type': 'loss', 'content': 0.08723779022693634, 'timestamp': '2025-09-30 22:30:33.372560', 'step': 14918, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:33.405324', 'step': 14918, 'epoch': 2} {'type': 'loss', 'content': 0.03984636813402176, 'timestamp': '2025-09-30 22:30:33.409346', 'step': 14919, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-30 22:30:33.442501', 'step': 14919, 'epoch': 2} {'type': 'loss', 'content': 0.2777590751647949, 'timestamp': '2025-09-30 22:30:33.469688', 'step': 14920, 'epoch': 2} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.501686', 'step': 14920, 'epoch': 3} {'type': 'loss', 'content': 0.05703924596309662, 'timestamp': '2025-09-30 22:30:33.508144', 'step': 14921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.542436', 'step': 14921, 'epoch': 3} {'type': 'loss', 'content': 0.07828375697135925, 'timestamp': '2025-09-30 22:30:33.548640', 'step': 14922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:33.583689', 'step': 14922, 'epoch': 3} {'type': 'loss', 'content': 0.07621604204177856, 'timestamp': '2025-09-30 22:30:33.588173', 'step': 14923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:33.621237', 'step': 14923, 'epoch': 3} {'type': 'loss', 'content': 0.09630531072616577, 'timestamp': '2025-09-30 22:30:33.647464', 'step': 14924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:33.683571', 'step': 14924, 'epoch': 3} {'type': 'loss', 'content': 0.1449355036020279, 'timestamp': '2025-09-30 22:30:33.688700', 'step': 14925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.722565', 'step': 14925, 'epoch': 3} {'type': 'loss', 'content': 0.0517866425216198, 'timestamp': '2025-09-30 22:30:33.727590', 'step': 14926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.770513', 'step': 14926, 'epoch': 3} {'type': 'loss', 'content': 0.16744638979434967, 'timestamp': '2025-09-30 22:30:33.775498', 'step': 14927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.807282', 'step': 14927, 'epoch': 3} {'type': 'loss', 'content': 0.06936833262443542, 'timestamp': '2025-09-30 22:30:33.835817', 'step': 14928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.869187', 'step': 14928, 'epoch': 3} {'type': 'loss', 'content': 0.041644539684057236, 'timestamp': '2025-09-30 22:30:33.874390', 'step': 14929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:33.908753', 'step': 14929, 'epoch': 3} {'type': 'loss', 'content': 0.08194272965192795, 'timestamp': '2025-09-30 22:30:33.914254', 'step': 14930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:33.949337', 'step': 14930, 'epoch': 3} {'type': 'loss', 'content': 0.048496708273887634, 'timestamp': '2025-09-30 22:30:33.954704', 'step': 14931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:33.987184', 'step': 14931, 'epoch': 3} {'type': 'loss', 'content': 0.1363571435213089, 'timestamp': '2025-09-30 22:30:34.014611', 'step': 14932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:34.048925', 'step': 14932, 'epoch': 3} {'type': 'loss', 'content': 0.07024453580379486, 'timestamp': '2025-09-30 22:30:34.053696', 'step': 14933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.086578', 'step': 14933, 'epoch': 3} {'type': 'loss', 'content': 0.06904321908950806, 'timestamp': '2025-09-30 22:30:34.094472', 'step': 14934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:34.129661', 'step': 14934, 'epoch': 3} {'type': 'loss', 'content': 0.11426282674074173, 'timestamp': '2025-09-30 22:30:34.144160', 'step': 14935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.190636', 'step': 14935, 'epoch': 3} {'type': 'loss', 'content': 0.122590571641922, 'timestamp': '2025-09-30 22:30:34.216034', 'step': 14936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.255531', 'step': 14936, 'epoch': 3} {'type': 'loss', 'content': 0.034803424030542374, 'timestamp': '2025-09-30 22:30:34.270827', 'step': 14937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.313373', 'step': 14937, 'epoch': 3} {'type': 'loss', 'content': 0.0717361718416214, 'timestamp': '2025-09-30 22:30:34.330665', 'step': 14938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:34.365064', 'step': 14938, 'epoch': 3} {'type': 'loss', 'content': 0.1350594162940979, 'timestamp': '2025-09-30 22:30:34.368919', 'step': 14939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.400435', 'step': 14939, 'epoch': 3} {'type': 'loss', 'content': 0.0824538916349411, 'timestamp': '2025-09-30 22:30:34.426746', 'step': 14940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.463074', 'step': 14940, 'epoch': 3} {'type': 'loss', 'content': 0.03233931213617325, 'timestamp': '2025-09-30 22:30:34.468980', 'step': 14941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.502706', 'step': 14941, 'epoch': 3} {'type': 'loss', 'content': 0.11381417512893677, 'timestamp': '2025-09-30 22:30:34.506126', 'step': 14942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.543506', 'step': 14942, 'epoch': 3} {'type': 'loss', 'content': 0.03250136598944664, 'timestamp': '2025-09-30 22:30:34.556335', 'step': 14943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.602126', 'step': 14943, 'epoch': 3} {'type': 'loss', 'content': 0.08329418301582336, 'timestamp': '2025-09-30 22:30:34.629639', 'step': 14944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:34.681468', 'step': 14944, 'epoch': 3} {'type': 'loss', 'content': 0.03470384329557419, 'timestamp': '2025-09-30 22:30:34.697553', 'step': 14945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:34.741581', 'step': 14945, 'epoch': 3} {'type': 'loss', 'content': 0.09834915399551392, 'timestamp': '2025-09-30 22:30:34.758041', 'step': 14946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:34.791173', 'step': 14946, 'epoch': 3} {'type': 'loss', 'content': 0.123167023062706, 'timestamp': '2025-09-30 22:30:34.810716', 'step': 14947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.844293', 'step': 14947, 'epoch': 3} {'type': 'loss', 'content': 0.04598316550254822, 'timestamp': '2025-09-30 22:30:34.881195', 'step': 14948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:34.917758', 'step': 14948, 'epoch': 3} {'type': 'loss', 'content': 0.06609103083610535, 'timestamp': '2025-09-30 22:30:34.926850', 'step': 14949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:34.961828', 'step': 14949, 'epoch': 3} {'type': 'loss', 'content': 0.04407421872019768, 'timestamp': '2025-09-30 22:30:34.966495', 'step': 14950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.004983', 'step': 14950, 'epoch': 3} {'type': 'loss', 'content': 0.09197753667831421, 'timestamp': '2025-09-30 22:30:35.011917', 'step': 14951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:35.063513', 'step': 14951, 'epoch': 3} {'type': 'loss', 'content': 0.04170626774430275, 'timestamp': '2025-09-30 22:30:35.091866', 'step': 14952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.128498', 'step': 14952, 'epoch': 3} {'type': 'loss', 'content': 0.10318358987569809, 'timestamp': '2025-09-30 22:30:35.134684', 'step': 14953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.168462', 'step': 14953, 'epoch': 3} {'type': 'loss', 'content': 0.08094488829374313, 'timestamp': '2025-09-30 22:30:35.173892', 'step': 14954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:35.210679', 'step': 14954, 'epoch': 3} {'type': 'loss', 'content': 0.12062770128250122, 'timestamp': '2025-09-30 22:30:35.214530', 'step': 14955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:35.258659', 'step': 14955, 'epoch': 3} {'type': 'loss', 'content': 0.030079858377575874, 'timestamp': '2025-09-30 22:30:35.295299', 'step': 14956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.329104', 'step': 14956, 'epoch': 3} {'type': 'loss', 'content': 0.06822947412729263, 'timestamp': '2025-09-30 22:30:35.335048', 'step': 14957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:35.367277', 'step': 14957, 'epoch': 3} {'type': 'loss', 'content': 0.05340474843978882, 'timestamp': '2025-09-30 22:30:35.371989', 'step': 14958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:35.403777', 'step': 14958, 'epoch': 3} {'type': 'loss', 'content': 0.10160794854164124, 'timestamp': '2025-09-30 22:30:35.408482', 'step': 14959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.440558', 'step': 14959, 'epoch': 3} {'type': 'loss', 'content': 0.057939980179071426, 'timestamp': '2025-09-30 22:30:35.466104', 'step': 14960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:35.499568', 'step': 14960, 'epoch': 3} {'type': 'loss', 'content': 0.04715350642800331, 'timestamp': '2025-09-30 22:30:35.519688', 'step': 14961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.564658', 'step': 14961, 'epoch': 3} {'type': 'loss', 'content': 0.09218920022249222, 'timestamp': '2025-09-30 22:30:35.569337', 'step': 14962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.601405', 'step': 14962, 'epoch': 3} {'type': 'loss', 'content': 0.088067427277565, 'timestamp': '2025-09-30 22:30:35.604813', 'step': 14963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.638062', 'step': 14963, 'epoch': 3} {'type': 'loss', 'content': 0.07676530629396439, 'timestamp': '2025-09-30 22:30:35.673484', 'step': 14964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.705985', 'step': 14964, 'epoch': 3} {'type': 'loss', 'content': 0.10459985584020615, 'timestamp': '2025-09-30 22:30:35.719822', 'step': 14965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:35.753160', 'step': 14965, 'epoch': 3} {'type': 'loss', 'content': 0.058726340532302856, 'timestamp': '2025-09-30 22:30:35.761433', 'step': 14966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:35.803051', 'step': 14966, 'epoch': 3} {'type': 'loss', 'content': 0.09073071926832199, 'timestamp': '2025-09-30 22:30:35.813594', 'step': 14967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:35.861910', 'step': 14967, 'epoch': 3} {'type': 'loss', 'content': 0.08204149454832077, 'timestamp': '2025-09-30 22:30:35.902447', 'step': 14968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:35.942871', 'step': 14968, 'epoch': 3} {'type': 'loss', 'content': 0.10916321724653244, 'timestamp': '2025-09-30 22:30:35.956488', 'step': 14969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.000219', 'step': 14969, 'epoch': 3} {'type': 'loss', 'content': 0.04939385503530502, 'timestamp': '2025-09-30 22:30:36.016151', 'step': 14970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:36.052163', 'step': 14970, 'epoch': 3} {'type': 'loss', 'content': 0.07760713994503021, 'timestamp': '2025-09-30 22:30:36.061636', 'step': 14971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.094606', 'step': 14971, 'epoch': 3} {'type': 'loss', 'content': 0.09093105047941208, 'timestamp': '2025-09-30 22:30:36.122768', 'step': 14972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.162295', 'step': 14972, 'epoch': 3} {'type': 'loss', 'content': 0.058057356625795364, 'timestamp': '2025-09-30 22:30:36.178165', 'step': 14973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:36.214395', 'step': 14973, 'epoch': 3} {'type': 'loss', 'content': 0.05360066890716553, 'timestamp': '2025-09-30 22:30:36.232822', 'step': 14974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:36.277111', 'step': 14974, 'epoch': 3} {'type': 'loss', 'content': 0.08279240131378174, 'timestamp': '2025-09-30 22:30:36.283923', 'step': 14975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:36.322006', 'step': 14975, 'epoch': 3} {'type': 'loss', 'content': 0.05605708062648773, 'timestamp': '2025-09-30 22:30:36.358043', 'step': 14976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:36.403611', 'step': 14976, 'epoch': 3} {'type': 'loss', 'content': 0.0940876379609108, 'timestamp': '2025-09-30 22:30:36.407205', 'step': 14977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:36.449626', 'step': 14977, 'epoch': 3} {'type': 'loss', 'content': 0.09037631750106812, 'timestamp': '2025-09-30 22:30:36.464420', 'step': 14978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.498274', 'step': 14978, 'epoch': 3} {'type': 'loss', 'content': 0.09873254597187042, 'timestamp': '2025-09-30 22:30:36.512803', 'step': 14979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:36.558726', 'step': 14979, 'epoch': 3} {'type': 'loss', 'content': 0.06951382756233215, 'timestamp': '2025-09-30 22:30:36.591255', 'step': 14980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:36.630891', 'step': 14980, 'epoch': 3} {'type': 'loss', 'content': 0.07310166954994202, 'timestamp': '2025-09-30 22:30:36.642941', 'step': 14981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:36.676730', 'step': 14981, 'epoch': 3} {'type': 'loss', 'content': 0.058381982147693634, 'timestamp': '2025-09-30 22:30:36.682882', 'step': 14982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.724937', 'step': 14982, 'epoch': 3} {'type': 'loss', 'content': 0.18764814734458923, 'timestamp': '2025-09-30 22:30:36.742092', 'step': 14983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:36.797481', 'step': 14983, 'epoch': 3} {'type': 'loss', 'content': 0.04740997403860092, 'timestamp': '2025-09-30 22:30:36.844693', 'step': 14984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:36.891854', 'step': 14984, 'epoch': 3} {'type': 'loss', 'content': 0.017746074125170708, 'timestamp': '2025-09-30 22:30:36.905391', 'step': 14985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:36.937636', 'step': 14985, 'epoch': 3} {'type': 'loss', 'content': 0.08980719745159149, 'timestamp': '2025-09-30 22:30:36.944135', 'step': 14986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:36.985265', 'step': 14986, 'epoch': 3} {'type': 'loss', 'content': 0.04074592888355255, 'timestamp': '2025-09-30 22:30:36.996760', 'step': 14987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.048750', 'step': 14987, 'epoch': 3} {'type': 'loss', 'content': 0.08196204155683517, 'timestamp': '2025-09-30 22:30:37.089441', 'step': 14988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:37.123465', 'step': 14988, 'epoch': 3} {'type': 'loss', 'content': 0.10314492881298065, 'timestamp': '2025-09-30 22:30:37.128283', 'step': 14989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.170666', 'step': 14989, 'epoch': 3} {'type': 'loss', 'content': 0.09756459295749664, 'timestamp': '2025-09-30 22:30:37.187267', 'step': 14990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.229522', 'step': 14990, 'epoch': 3} {'type': 'loss', 'content': 0.0735018253326416, 'timestamp': '2025-09-30 22:30:37.233553', 'step': 14991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.289048', 'step': 14991, 'epoch': 3} {'type': 'loss', 'content': 0.0679205060005188, 'timestamp': '2025-09-30 22:30:37.322131', 'step': 14992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:37.370268', 'step': 14992, 'epoch': 3} {'type': 'loss', 'content': 0.09923131763935089, 'timestamp': '2025-09-30 22:30:37.382604', 'step': 14993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.422459', 'step': 14993, 'epoch': 3} {'type': 'loss', 'content': 0.0451631173491478, 'timestamp': '2025-09-30 22:30:37.427153', 'step': 14994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:37.471604', 'step': 14994, 'epoch': 3} {'type': 'loss', 'content': 0.07801003754138947, 'timestamp': '2025-09-30 22:30:37.486966', 'step': 14995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:37.538551', 'step': 14995, 'epoch': 3} {'type': 'loss', 'content': 0.06479986757040024, 'timestamp': '2025-09-30 22:30:37.575811', 'step': 14996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:37.609157', 'step': 14996, 'epoch': 3} {'type': 'loss', 'content': 0.059813614934682846, 'timestamp': '2025-09-30 22:30:37.621619', 'step': 14997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.661267', 'step': 14997, 'epoch': 3} {'type': 'loss', 'content': 0.06779082864522934, 'timestamp': '2025-09-30 22:30:37.667466', 'step': 14998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.701466', 'step': 14998, 'epoch': 3} {'type': 'loss', 'content': 0.05080264061689377, 'timestamp': '2025-09-30 22:30:37.707842', 'step': 14999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:37.739668', 'step': 14999, 'epoch': 3} {'type': 'loss', 'content': 0.0764751136302948, 'timestamp': '2025-09-30 22:30:37.767214', 'step': 15000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15000', 'timestamp': '2025-09-30 22:30:43.369696', 'step': 15000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:43.421334', 'step': 15000, 'epoch': 3} {'type': 'loss', 'content': 0.0777624174952507, 'timestamp': '2025-09-30 22:30:43.426358', 'step': 15001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:43.468389', 'step': 15001, 'epoch': 3} {'type': 'loss', 'content': 0.09552911669015884, 'timestamp': '2025-09-30 22:30:43.474645', 'step': 15002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:43.515363', 'step': 15002, 'epoch': 3} {'type': 'loss', 'content': 0.1416371613740921, 'timestamp': '2025-09-30 22:30:43.531667', 'step': 15003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.565363', 'step': 15003, 'epoch': 3} {'type': 'loss', 'content': 0.08100763708353043, 'timestamp': '2025-09-30 22:30:43.590329', 'step': 15004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:43.623692', 'step': 15004, 'epoch': 3} {'type': 'loss', 'content': 0.05011896416544914, 'timestamp': '2025-09-30 22:30:43.628002', 'step': 15005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:43.659787', 'step': 15005, 'epoch': 3} {'type': 'loss', 'content': 0.03652160242199898, 'timestamp': '2025-09-30 22:30:43.663948', 'step': 15006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.704265', 'step': 15006, 'epoch': 3} {'type': 'loss', 'content': 0.07374239712953568, 'timestamp': '2025-09-30 22:30:43.707768', 'step': 15007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:43.750158', 'step': 15007, 'epoch': 3} {'type': 'loss', 'content': 0.050068266689777374, 'timestamp': '2025-09-30 22:30:43.776415', 'step': 15008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.808870', 'step': 15008, 'epoch': 3} {'type': 'loss', 'content': 0.06798863410949707, 'timestamp': '2025-09-30 22:30:43.815581', 'step': 15009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.847964', 'step': 15009, 'epoch': 3} {'type': 'loss', 'content': 0.1049896776676178, 'timestamp': '2025-09-30 22:30:43.852828', 'step': 15010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.887649', 'step': 15010, 'epoch': 3} {'type': 'loss', 'content': 0.07504411041736603, 'timestamp': '2025-09-30 22:30:43.893044', 'step': 15011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.924900', 'step': 15011, 'epoch': 3} {'type': 'loss', 'content': 0.06434280425310135, 'timestamp': '2025-09-30 22:30:43.953037', 'step': 15012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:43.984805', 'step': 15012, 'epoch': 3} {'type': 'loss', 'content': 0.10439637303352356, 'timestamp': '2025-09-30 22:30:43.991578', 'step': 15013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:44.025405', 'step': 15013, 'epoch': 3} {'type': 'loss', 'content': 0.07639981061220169, 'timestamp': '2025-09-30 22:30:44.030093', 'step': 15014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:44.074594', 'step': 15014, 'epoch': 3} {'type': 'loss', 'content': 0.046227503567934036, 'timestamp': '2025-09-30 22:30:44.079224', 'step': 15015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:44.111940', 'step': 15015, 'epoch': 3} {'type': 'loss', 'content': 0.10219583660364151, 'timestamp': '2025-09-30 22:30:44.138107', 'step': 15016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.181491', 'step': 15016, 'epoch': 3} {'type': 'loss', 'content': 0.08443868905305862, 'timestamp': '2025-09-30 22:30:44.202840', 'step': 15017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.236631', 'step': 15017, 'epoch': 3} {'type': 'loss', 'content': 0.08382466435432434, 'timestamp': '2025-09-30 22:30:44.242233', 'step': 15018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:44.283421', 'step': 15018, 'epoch': 3} {'type': 'loss', 'content': 0.08101525157690048, 'timestamp': '2025-09-30 22:30:44.287873', 'step': 15019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.330524', 'step': 15019, 'epoch': 3} {'type': 'loss', 'content': 0.09009597450494766, 'timestamp': '2025-09-30 22:30:44.364436', 'step': 15020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:44.396627', 'step': 15020, 'epoch': 3} {'type': 'loss', 'content': 0.19723162055015564, 'timestamp': '2025-09-30 22:30:44.409891', 'step': 15021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:44.454084', 'step': 15021, 'epoch': 3} {'type': 'loss', 'content': 0.035764094442129135, 'timestamp': '2025-09-30 22:30:44.459456', 'step': 15022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:44.508065', 'step': 15022, 'epoch': 3} {'type': 'loss', 'content': 0.07881945371627808, 'timestamp': '2025-09-30 22:30:44.522834', 'step': 15023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.569146', 'step': 15023, 'epoch': 3} {'type': 'loss', 'content': 0.13418836891651154, 'timestamp': '2025-09-30 22:30:44.597638', 'step': 15024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.630898', 'step': 15024, 'epoch': 3} {'type': 'loss', 'content': 0.11202982068061829, 'timestamp': '2025-09-30 22:30:44.637154', 'step': 15025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.670884', 'step': 15025, 'epoch': 3} {'type': 'loss', 'content': 0.0660327598452568, 'timestamp': '2025-09-30 22:30:44.683333', 'step': 15026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:44.715727', 'step': 15026, 'epoch': 3} {'type': 'loss', 'content': 0.07824420183897018, 'timestamp': '2025-09-30 22:30:44.722252', 'step': 15027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:44.755766', 'step': 15027, 'epoch': 3} {'type': 'loss', 'content': 0.13052316009998322, 'timestamp': '2025-09-30 22:30:44.794647', 'step': 15028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:44.827245', 'step': 15028, 'epoch': 3} {'type': 'loss', 'content': 0.09309624135494232, 'timestamp': '2025-09-30 22:30:44.839452', 'step': 15029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.873323', 'step': 15029, 'epoch': 3} {'type': 'loss', 'content': 0.0953480526804924, 'timestamp': '2025-09-30 22:30:44.877358', 'step': 15030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:44.911076', 'step': 15030, 'epoch': 3} {'type': 'loss', 'content': 0.08237098157405853, 'timestamp': '2025-09-30 22:30:44.916794', 'step': 15031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:44.959042', 'step': 15031, 'epoch': 3} {'type': 'loss', 'content': 0.06849456578493118, 'timestamp': '2025-09-30 22:30:44.994886', 'step': 15032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:45.028156', 'step': 15032, 'epoch': 3} {'type': 'loss', 'content': 0.11267212778329849, 'timestamp': '2025-09-30 22:30:45.033673', 'step': 15033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:45.073281', 'step': 15033, 'epoch': 3} {'type': 'loss', 'content': 0.15016523003578186, 'timestamp': '2025-09-30 22:30:45.078535', 'step': 15034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:45.111533', 'step': 15034, 'epoch': 3} {'type': 'loss', 'content': 0.0783587247133255, 'timestamp': '2025-09-30 22:30:45.115038', 'step': 15035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.149388', 'step': 15035, 'epoch': 3} {'type': 'loss', 'content': 0.17253230512142181, 'timestamp': '2025-09-30 22:30:45.174706', 'step': 15036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.208796', 'step': 15036, 'epoch': 3} {'type': 'loss', 'content': 0.09745094925165176, 'timestamp': '2025-09-30 22:30:45.220455', 'step': 15037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.262913', 'step': 15037, 'epoch': 3} {'type': 'loss', 'content': 0.1568554937839508, 'timestamp': '2025-09-30 22:30:45.278567', 'step': 15038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:45.324806', 'step': 15038, 'epoch': 3} {'type': 'loss', 'content': 0.06437543034553528, 'timestamp': '2025-09-30 22:30:45.329273', 'step': 15039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:45.366654', 'step': 15039, 'epoch': 3} {'type': 'loss', 'content': 0.16539710760116577, 'timestamp': '2025-09-30 22:30:45.400746', 'step': 15040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.436054', 'step': 15040, 'epoch': 3} {'type': 'loss', 'content': 0.12550127506256104, 'timestamp': '2025-09-30 22:30:45.438697', 'step': 15041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:45.471798', 'step': 15041, 'epoch': 3} {'type': 'loss', 'content': 0.09200763702392578, 'timestamp': '2025-09-30 22:30:45.489894', 'step': 15042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:45.534266', 'step': 15042, 'epoch': 3} {'type': 'loss', 'content': 0.13481014966964722, 'timestamp': '2025-09-30 22:30:45.538710', 'step': 15043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:45.572779', 'step': 15043, 'epoch': 3} {'type': 'loss', 'content': 0.08898055553436279, 'timestamp': '2025-09-30 22:30:45.597303', 'step': 15044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:45.636217', 'step': 15044, 'epoch': 3} {'type': 'loss', 'content': 0.009969369508326054, 'timestamp': '2025-09-30 22:30:45.651438', 'step': 15045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:45.688523', 'step': 15045, 'epoch': 3} {'type': 'loss', 'content': 0.029259078204631805, 'timestamp': '2025-09-30 22:30:45.692038', 'step': 15046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:45.739542', 'step': 15046, 'epoch': 3} {'type': 'loss', 'content': 0.09208032488822937, 'timestamp': '2025-09-30 22:30:45.743980', 'step': 15047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.776331', 'step': 15047, 'epoch': 3} {'type': 'loss', 'content': 0.20948293805122375, 'timestamp': '2025-09-30 22:30:45.802042', 'step': 15048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:45.837750', 'step': 15048, 'epoch': 3} {'type': 'loss', 'content': 0.14108902215957642, 'timestamp': '2025-09-30 22:30:45.841000', 'step': 15049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:45.873716', 'step': 15049, 'epoch': 3} {'type': 'loss', 'content': 0.11158043146133423, 'timestamp': '2025-09-30 22:30:45.886652', 'step': 15050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:45.919647', 'step': 15050, 'epoch': 3} {'type': 'loss', 'content': 0.0847487673163414, 'timestamp': '2025-09-30 22:30:45.936005', 'step': 15051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:45.968869', 'step': 15051, 'epoch': 3} {'type': 'loss', 'content': 0.05293162539601326, 'timestamp': '2025-09-30 22:30:45.995859', 'step': 15052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:46.047955', 'step': 15052, 'epoch': 3} {'type': 'loss', 'content': 0.15313182771205902, 'timestamp': '2025-09-30 22:30:46.063954', 'step': 15053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.104342', 'step': 15053, 'epoch': 3} {'type': 'loss', 'content': 0.1402837485074997, 'timestamp': '2025-09-30 22:30:46.119530', 'step': 15054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.171325', 'step': 15054, 'epoch': 3} {'type': 'loss', 'content': 0.032769594341516495, 'timestamp': '2025-09-30 22:30:46.175308', 'step': 15055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:46.211322', 'step': 15055, 'epoch': 3} {'type': 'loss', 'content': 0.07838406413793564, 'timestamp': '2025-09-30 22:30:46.238176', 'step': 15056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:46.273513', 'step': 15056, 'epoch': 3} {'type': 'loss', 'content': 0.07536350190639496, 'timestamp': '2025-09-30 22:30:46.281003', 'step': 15057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.329113', 'step': 15057, 'epoch': 3} {'type': 'loss', 'content': 0.09962299466133118, 'timestamp': '2025-09-30 22:30:46.333734', 'step': 15058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.365990', 'step': 15058, 'epoch': 3} {'type': 'loss', 'content': 0.08994089812040329, 'timestamp': '2025-09-30 22:30:46.369618', 'step': 15059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:46.418748', 'step': 15059, 'epoch': 3} {'type': 'loss', 'content': 0.08033669739961624, 'timestamp': '2025-09-30 22:30:46.444833', 'step': 15060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.476779', 'step': 15060, 'epoch': 3} {'type': 'loss', 'content': 0.16118846833705902, 'timestamp': '2025-09-30 22:30:46.482121', 'step': 15061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:46.518936', 'step': 15061, 'epoch': 3} {'type': 'loss', 'content': 0.08051417022943497, 'timestamp': '2025-09-30 22:30:46.523407', 'step': 15062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.556102', 'step': 15062, 'epoch': 3} {'type': 'loss', 'content': 0.12158757448196411, 'timestamp': '2025-09-30 22:30:46.561229', 'step': 15063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:46.601423', 'step': 15063, 'epoch': 3} {'type': 'loss', 'content': 0.09601278603076935, 'timestamp': '2025-09-30 22:30:46.641133', 'step': 15064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.673227', 'step': 15064, 'epoch': 3} {'type': 'loss', 'content': 0.10405781120061874, 'timestamp': '2025-09-30 22:30:46.677623', 'step': 15065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:46.717567', 'step': 15065, 'epoch': 3} {'type': 'loss', 'content': 0.16031889617443085, 'timestamp': '2025-09-30 22:30:46.721988', 'step': 15066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:46.761905', 'step': 15066, 'epoch': 3} {'type': 'loss', 'content': 0.0987556204199791, 'timestamp': '2025-09-30 22:30:46.767708', 'step': 15067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:46.802932', 'step': 15067, 'epoch': 3} {'type': 'loss', 'content': 0.07482971996068954, 'timestamp': '2025-09-30 22:30:46.830629', 'step': 15068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:46.878514', 'step': 15068, 'epoch': 3} {'type': 'loss', 'content': 0.028300819918513298, 'timestamp': '2025-09-30 22:30:46.886464', 'step': 15069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:46.926548', 'step': 15069, 'epoch': 3} {'type': 'loss', 'content': 0.11025556176900864, 'timestamp': '2025-09-30 22:30:46.939931', 'step': 15070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:46.984103', 'step': 15070, 'epoch': 3} {'type': 'loss', 'content': 0.07497432082891464, 'timestamp': '2025-09-30 22:30:46.987948', 'step': 15071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:47.020328', 'step': 15071, 'epoch': 3} {'type': 'loss', 'content': 0.07045487314462662, 'timestamp': '2025-09-30 22:30:47.045456', 'step': 15072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:47.077674', 'step': 15072, 'epoch': 3} {'type': 'loss', 'content': 0.0796440839767456, 'timestamp': '2025-09-30 22:30:47.082053', 'step': 15073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:47.114702', 'step': 15073, 'epoch': 3} {'type': 'loss', 'content': 0.13906683027744293, 'timestamp': '2025-09-30 22:30:47.119335', 'step': 15074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:47.153310', 'step': 15074, 'epoch': 3} {'type': 'loss', 'content': 0.22534844279289246, 'timestamp': '2025-09-30 22:30:47.157952', 'step': 15075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.199788', 'step': 15075, 'epoch': 3} {'type': 'loss', 'content': 0.13390399515628815, 'timestamp': '2025-09-30 22:30:47.235572', 'step': 15076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:47.277859', 'step': 15076, 'epoch': 3} {'type': 'loss', 'content': 0.04841700196266174, 'timestamp': '2025-09-30 22:30:47.282851', 'step': 15077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.325390', 'step': 15077, 'epoch': 3} {'type': 'loss', 'content': 0.12572136521339417, 'timestamp': '2025-09-30 22:30:47.328505', 'step': 15078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.374061', 'step': 15078, 'epoch': 3} {'type': 'loss', 'content': 0.0832531526684761, 'timestamp': '2025-09-30 22:30:47.379441', 'step': 15079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.414078', 'step': 15079, 'epoch': 3} {'type': 'loss', 'content': 0.06694205105304718, 'timestamp': '2025-09-30 22:30:47.449111', 'step': 15080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:47.499435', 'step': 15080, 'epoch': 3} {'type': 'loss', 'content': 0.1191040650010109, 'timestamp': '2025-09-30 22:30:47.517466', 'step': 15081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:47.551545', 'step': 15081, 'epoch': 3} {'type': 'loss', 'content': 0.11862590909004211, 'timestamp': '2025-09-30 22:30:47.557329', 'step': 15082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:47.601367', 'step': 15082, 'epoch': 3} {'type': 'loss', 'content': 0.06601487100124359, 'timestamp': '2025-09-30 22:30:47.607200', 'step': 15083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:47.639671', 'step': 15083, 'epoch': 3} {'type': 'loss', 'content': 0.06628397852182388, 'timestamp': '2025-09-30 22:30:47.677528', 'step': 15084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:47.712860', 'step': 15084, 'epoch': 3} {'type': 'loss', 'content': 0.15332812070846558, 'timestamp': '2025-09-30 22:30:47.718453', 'step': 15085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:47.752282', 'step': 15085, 'epoch': 3} {'type': 'loss', 'content': 0.06084920093417168, 'timestamp': '2025-09-30 22:30:47.758280', 'step': 15086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:47.792519', 'step': 15086, 'epoch': 3} {'type': 'loss', 'content': 0.0697287991642952, 'timestamp': '2025-09-30 22:30:47.796298', 'step': 15087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.838230', 'step': 15087, 'epoch': 3} {'type': 'loss', 'content': 0.0693119615316391, 'timestamp': '2025-09-30 22:30:47.878491', 'step': 15088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:47.910556', 'step': 15088, 'epoch': 3} {'type': 'loss', 'content': 0.05330052226781845, 'timestamp': '2025-09-30 22:30:47.915201', 'step': 15089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:47.946951', 'step': 15089, 'epoch': 3} {'type': 'loss', 'content': 0.09394479542970657, 'timestamp': '2025-09-30 22:30:47.951625', 'step': 15090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:47.985199', 'step': 15090, 'epoch': 3} {'type': 'loss', 'content': 0.08524720370769501, 'timestamp': '2025-09-30 22:30:47.990110', 'step': 15091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:48.022534', 'step': 15091, 'epoch': 3} {'type': 'loss', 'content': 0.15471899509429932, 'timestamp': '2025-09-30 22:30:48.047478', 'step': 15092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:48.078856', 'step': 15092, 'epoch': 3} {'type': 'loss', 'content': 0.16056372225284576, 'timestamp': '2025-09-30 22:30:48.093706', 'step': 15093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:48.139250', 'step': 15093, 'epoch': 3} {'type': 'loss', 'content': 0.08633091300725937, 'timestamp': '2025-09-30 22:30:48.143857', 'step': 15094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.180647', 'step': 15094, 'epoch': 3} {'type': 'loss', 'content': 0.11809296160936356, 'timestamp': '2025-09-30 22:30:48.184263', 'step': 15095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.218174', 'step': 15095, 'epoch': 3} {'type': 'loss', 'content': 0.13569803535938263, 'timestamp': '2025-09-30 22:30:48.244944', 'step': 15096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:48.288102', 'step': 15096, 'epoch': 3} {'type': 'loss', 'content': 0.11439681798219681, 'timestamp': '2025-09-30 22:30:48.292867', 'step': 15097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:48.342179', 'step': 15097, 'epoch': 3} {'type': 'loss', 'content': 0.05029710754752159, 'timestamp': '2025-09-30 22:30:48.346550', 'step': 15098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.381365', 'step': 15098, 'epoch': 3} {'type': 'loss', 'content': 0.1517934799194336, 'timestamp': '2025-09-30 22:30:48.385840', 'step': 15099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:48.417826', 'step': 15099, 'epoch': 3} {'type': 'loss', 'content': 0.06275730580091476, 'timestamp': '2025-09-30 22:30:48.444802', 'step': 15100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:48.479443', 'step': 15100, 'epoch': 3} {'type': 'loss', 'content': 0.14409273862838745, 'timestamp': '2025-09-30 22:30:48.484558', 'step': 15101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.519061', 'step': 15101, 'epoch': 3} {'type': 'loss', 'content': 0.03658493608236313, 'timestamp': '2025-09-30 22:30:48.534307', 'step': 15102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.567890', 'step': 15102, 'epoch': 3} {'type': 'loss', 'content': 0.15743982791900635, 'timestamp': '2025-09-30 22:30:48.571825', 'step': 15103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.615050', 'step': 15103, 'epoch': 3} {'type': 'loss', 'content': 0.07990875095129013, 'timestamp': '2025-09-30 22:30:48.641444', 'step': 15104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:48.686962', 'step': 15104, 'epoch': 3} {'type': 'loss', 'content': 0.10569417476654053, 'timestamp': '2025-09-30 22:30:48.701248', 'step': 15105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:48.733737', 'step': 15105, 'epoch': 3} {'type': 'loss', 'content': 0.045315541326999664, 'timestamp': '2025-09-30 22:30:48.748509', 'step': 15106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:48.781621', 'step': 15106, 'epoch': 3} {'type': 'loss', 'content': 0.10742691904306412, 'timestamp': '2025-09-30 22:30:48.785371', 'step': 15107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:48.818875', 'step': 15107, 'epoch': 3} {'type': 'loss', 'content': 0.11362873017787933, 'timestamp': '2025-09-30 22:30:48.844483', 'step': 15108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:48.892387', 'step': 15108, 'epoch': 3} {'type': 'loss', 'content': 0.1150466650724411, 'timestamp': '2025-09-30 22:30:48.910591', 'step': 15109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:48.943135', 'step': 15109, 'epoch': 3} {'type': 'loss', 'content': 0.03514060750603676, 'timestamp': '2025-09-30 22:30:48.947949', 'step': 15110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:48.980290', 'step': 15110, 'epoch': 3} {'type': 'loss', 'content': 0.024919837713241577, 'timestamp': '2025-09-30 22:30:48.984903', 'step': 15111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:49.017941', 'step': 15111, 'epoch': 3} {'type': 'loss', 'content': 0.07892782241106033, 'timestamp': '2025-09-30 22:30:49.044199', 'step': 15112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.079324', 'step': 15112, 'epoch': 3} {'type': 'loss', 'content': 0.11578970402479172, 'timestamp': '2025-09-30 22:30:49.084251', 'step': 15113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:49.127184', 'step': 15113, 'epoch': 3} {'type': 'loss', 'content': 0.11152517795562744, 'timestamp': '2025-09-30 22:30:49.140953', 'step': 15114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:49.184068', 'step': 15114, 'epoch': 3} {'type': 'loss', 'content': 0.13032659888267517, 'timestamp': '2025-09-30 22:30:49.189294', 'step': 15115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:49.221005', 'step': 15115, 'epoch': 3} {'type': 'loss', 'content': 0.03871211037039757, 'timestamp': '2025-09-30 22:30:49.254395', 'step': 15116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:49.308345', 'step': 15116, 'epoch': 3} {'type': 'loss', 'content': 0.10555193573236465, 'timestamp': '2025-09-30 22:30:49.313571', 'step': 15117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.346599', 'step': 15117, 'epoch': 3} {'type': 'loss', 'content': 0.07339733839035034, 'timestamp': '2025-09-30 22:30:49.351346', 'step': 15118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:49.383817', 'step': 15118, 'epoch': 3} {'type': 'loss', 'content': 0.06196148321032524, 'timestamp': '2025-09-30 22:30:49.390642', 'step': 15119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:49.426102', 'step': 15119, 'epoch': 3} {'type': 'loss', 'content': 0.1157965213060379, 'timestamp': '2025-09-30 22:30:49.459709', 'step': 15120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:49.497605', 'step': 15120, 'epoch': 3} {'type': 'loss', 'content': 0.19061587750911713, 'timestamp': '2025-09-30 22:30:49.510958', 'step': 15121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:49.545128', 'step': 15121, 'epoch': 3} {'type': 'loss', 'content': 0.06116221472620964, 'timestamp': '2025-09-30 22:30:49.551196', 'step': 15122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:49.584431', 'step': 15122, 'epoch': 3} {'type': 'loss', 'content': 0.08531647175550461, 'timestamp': '2025-09-30 22:30:49.589768', 'step': 15123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:49.630090', 'step': 15123, 'epoch': 3} {'type': 'loss', 'content': 0.07412489503622055, 'timestamp': '2025-09-30 22:30:49.668382', 'step': 15124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.703246', 'step': 15124, 'epoch': 3} {'type': 'loss', 'content': 0.13276880979537964, 'timestamp': '2025-09-30 22:30:49.716549', 'step': 15125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.766117', 'step': 15125, 'epoch': 3} {'type': 'loss', 'content': 0.07023098319768906, 'timestamp': '2025-09-30 22:30:49.772283', 'step': 15126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:49.805270', 'step': 15126, 'epoch': 3} {'type': 'loss', 'content': 0.11977144330739975, 'timestamp': '2025-09-30 22:30:49.810309', 'step': 15127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.852260', 'step': 15127, 'epoch': 3} {'type': 'loss', 'content': 0.11928202211856842, 'timestamp': '2025-09-30 22:30:49.877731', 'step': 15128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.918012', 'step': 15128, 'epoch': 3} {'type': 'loss', 'content': 0.08219724893569946, 'timestamp': '2025-09-30 22:30:49.921341', 'step': 15129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.961513', 'step': 15129, 'epoch': 3} {'type': 'loss', 'content': 0.21080903708934784, 'timestamp': '2025-09-30 22:30:49.967873', 'step': 15130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:49.999681', 'step': 15130, 'epoch': 3} {'type': 'loss', 'content': 0.06770693510770798, 'timestamp': '2025-09-30 22:30:50.015243', 'step': 15131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.059079', 'step': 15131, 'epoch': 3} {'type': 'loss', 'content': 0.11940780282020569, 'timestamp': '2025-09-30 22:30:50.084877', 'step': 15132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.118935', 'step': 15132, 'epoch': 3} {'type': 'loss', 'content': 0.11707805842161179, 'timestamp': '2025-09-30 22:30:50.126710', 'step': 15133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.159425', 'step': 15133, 'epoch': 3} {'type': 'loss', 'content': 0.15842032432556152, 'timestamp': '2025-09-30 22:30:50.164460', 'step': 15134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.196314', 'step': 15134, 'epoch': 3} {'type': 'loss', 'content': 0.08671630918979645, 'timestamp': '2025-09-30 22:30:50.201690', 'step': 15135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.237333', 'step': 15135, 'epoch': 3} {'type': 'loss', 'content': 0.1364928036928177, 'timestamp': '2025-09-30 22:30:50.263280', 'step': 15136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.306393', 'step': 15136, 'epoch': 3} {'type': 'loss', 'content': 0.13567033410072327, 'timestamp': '2025-09-30 22:30:50.310159', 'step': 15137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.356588', 'step': 15137, 'epoch': 3} {'type': 'loss', 'content': 0.08808640390634537, 'timestamp': '2025-09-30 22:30:50.364686', 'step': 15138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.401823', 'step': 15138, 'epoch': 3} {'type': 'loss', 'content': 0.15340860188007355, 'timestamp': '2025-09-30 22:30:50.405976', 'step': 15139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.440765', 'step': 15139, 'epoch': 3} {'type': 'loss', 'content': 0.13025052845478058, 'timestamp': '2025-09-30 22:30:50.468326', 'step': 15140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.500552', 'step': 15140, 'epoch': 3} {'type': 'loss', 'content': 0.1599939614534378, 'timestamp': '2025-09-30 22:30:50.505347', 'step': 15141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.548674', 'step': 15141, 'epoch': 3} {'type': 'loss', 'content': 0.08733893930912018, 'timestamp': '2025-09-30 22:30:50.561643', 'step': 15142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.593637', 'step': 15142, 'epoch': 3} {'type': 'loss', 'content': 0.11407560855150223, 'timestamp': '2025-09-30 22:30:50.603574', 'step': 15143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.639282', 'step': 15143, 'epoch': 3} {'type': 'loss', 'content': 0.03855165094137192, 'timestamp': '2025-09-30 22:30:50.666148', 'step': 15144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.699080', 'step': 15144, 'epoch': 3} {'type': 'loss', 'content': 0.08150872588157654, 'timestamp': '2025-09-30 22:30:50.707480', 'step': 15145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.739105', 'step': 15145, 'epoch': 3} {'type': 'loss', 'content': 0.10298506915569305, 'timestamp': '2025-09-30 22:30:50.758299', 'step': 15146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:50.792886', 'step': 15146, 'epoch': 3} {'type': 'loss', 'content': 0.11421316862106323, 'timestamp': '2025-09-30 22:30:50.813821', 'step': 15147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:50.854886', 'step': 15147, 'epoch': 3} {'type': 'loss', 'content': 0.042731598019599915, 'timestamp': '2025-09-30 22:30:50.890540', 'step': 15148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:50.940490', 'step': 15148, 'epoch': 3} {'type': 'loss', 'content': 0.12881577014923096, 'timestamp': '2025-09-30 22:30:50.957191', 'step': 15149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.001166', 'step': 15149, 'epoch': 3} {'type': 'loss', 'content': 0.05884108692407608, 'timestamp': '2025-09-30 22:30:51.015549', 'step': 15150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.059492', 'step': 15150, 'epoch': 3} {'type': 'loss', 'content': 0.06852564215660095, 'timestamp': '2025-09-30 22:30:51.076039', 'step': 15151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.109079', 'step': 15151, 'epoch': 3} {'type': 'loss', 'content': 0.1366364061832428, 'timestamp': '2025-09-30 22:30:51.134525', 'step': 15152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:51.165809', 'step': 15152, 'epoch': 3} {'type': 'loss', 'content': 0.09801258146762848, 'timestamp': '2025-09-30 22:30:51.184115', 'step': 15153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.229850', 'step': 15153, 'epoch': 3} {'type': 'loss', 'content': 0.07319546490907669, 'timestamp': '2025-09-30 22:30:51.247861', 'step': 15154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:51.280701', 'step': 15154, 'epoch': 3} {'type': 'loss', 'content': 0.14905209839344025, 'timestamp': '2025-09-30 22:30:51.299284', 'step': 15155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:51.346939', 'step': 15155, 'epoch': 3} {'type': 'loss', 'content': 0.0319049209356308, 'timestamp': '2025-09-30 22:30:51.382153', 'step': 15156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.426020', 'step': 15156, 'epoch': 3} {'type': 'loss', 'content': 0.1381235122680664, 'timestamp': '2025-09-30 22:30:51.430717', 'step': 15157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.463337', 'step': 15157, 'epoch': 3} {'type': 'loss', 'content': 0.136921226978302, 'timestamp': '2025-09-30 22:30:51.466277', 'step': 15158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.504618', 'step': 15158, 'epoch': 3} {'type': 'loss', 'content': 0.12632609903812408, 'timestamp': '2025-09-30 22:30:51.507764', 'step': 15159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:51.539381', 'step': 15159, 'epoch': 3} {'type': 'loss', 'content': 0.03304733335971832, 'timestamp': '2025-09-30 22:30:51.564042', 'step': 15160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.596371', 'step': 15160, 'epoch': 3} {'type': 'loss', 'content': 0.13045385479927063, 'timestamp': '2025-09-30 22:30:51.611919', 'step': 15161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.656102', 'step': 15161, 'epoch': 3} {'type': 'loss', 'content': 0.029376965016126633, 'timestamp': '2025-09-30 22:30:51.661883', 'step': 15162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.698662', 'step': 15162, 'epoch': 3} {'type': 'loss', 'content': 0.23672029376029968, 'timestamp': '2025-09-30 22:30:51.703395', 'step': 15163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:51.735728', 'step': 15163, 'epoch': 3} {'type': 'loss', 'content': 0.09346047788858414, 'timestamp': '2025-09-30 22:30:51.762823', 'step': 15164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:51.794561', 'step': 15164, 'epoch': 3} {'type': 'loss', 'content': 0.13956938683986664, 'timestamp': '2025-09-30 22:30:51.797512', 'step': 15165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.833545', 'step': 15165, 'epoch': 3} {'type': 'loss', 'content': 0.16070163249969482, 'timestamp': '2025-09-30 22:30:51.847641', 'step': 15166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.892384', 'step': 15166, 'epoch': 3} {'type': 'loss', 'content': 0.0470317006111145, 'timestamp': '2025-09-30 22:30:51.897506', 'step': 15167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:51.942907', 'step': 15167, 'epoch': 3} {'type': 'loss', 'content': 0.07467403262853622, 'timestamp': '2025-09-30 22:30:51.969030', 'step': 15168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:52.001472', 'step': 15168, 'epoch': 3} {'type': 'loss', 'content': 0.1187589019536972, 'timestamp': '2025-09-30 22:30:52.015596', 'step': 15169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:52.059510', 'step': 15169, 'epoch': 3} {'type': 'loss', 'content': 0.08880224823951721, 'timestamp': '2025-09-30 22:30:52.074356', 'step': 15170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.105287', 'step': 15170, 'epoch': 3} {'type': 'loss', 'content': 0.07565360516309738, 'timestamp': '2025-09-30 22:30:52.119006', 'step': 15171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.150959', 'step': 15171, 'epoch': 3} {'type': 'loss', 'content': 0.08073961734771729, 'timestamp': '2025-09-30 22:30:52.177982', 'step': 15172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.209089', 'step': 15172, 'epoch': 3} {'type': 'loss', 'content': 0.1679016351699829, 'timestamp': '2025-09-30 22:30:52.212637', 'step': 15173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.245526', 'step': 15173, 'epoch': 3} {'type': 'loss', 'content': 0.06423585116863251, 'timestamp': '2025-09-30 22:30:52.262103', 'step': 15174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.295399', 'step': 15174, 'epoch': 3} {'type': 'loss', 'content': 0.07735943049192429, 'timestamp': '2025-09-30 22:30:52.300345', 'step': 15175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.334381', 'step': 15175, 'epoch': 3} {'type': 'loss', 'content': 0.031979840248823166, 'timestamp': '2025-09-30 22:30:52.361442', 'step': 15176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:52.406471', 'step': 15176, 'epoch': 3} {'type': 'loss', 'content': 0.12270824611186981, 'timestamp': '2025-09-30 22:30:52.410388', 'step': 15177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.443286', 'step': 15177, 'epoch': 3} {'type': 'loss', 'content': 0.10504798591136932, 'timestamp': '2025-09-30 22:30:52.447770', 'step': 15178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.478934', 'step': 15178, 'epoch': 3} {'type': 'loss', 'content': 0.10397939383983612, 'timestamp': '2025-09-30 22:30:52.485081', 'step': 15179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.516709', 'step': 15179, 'epoch': 3} {'type': 'loss', 'content': 0.1379096508026123, 'timestamp': '2025-09-30 22:30:52.543735', 'step': 15180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.576158', 'step': 15180, 'epoch': 3} {'type': 'loss', 'content': 0.09290819615125656, 'timestamp': '2025-09-30 22:30:52.581409', 'step': 15181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.624509', 'step': 15181, 'epoch': 3} {'type': 'loss', 'content': 0.06060066074132919, 'timestamp': '2025-09-30 22:30:52.630342', 'step': 15182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.662810', 'step': 15182, 'epoch': 3} {'type': 'loss', 'content': 0.09341081231832504, 'timestamp': '2025-09-30 22:30:52.668168', 'step': 15183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:30:52.701774', 'step': 15183, 'epoch': 3} {'type': 'loss', 'content': 0.05888354778289795, 'timestamp': '2025-09-30 22:30:52.729837', 'step': 15184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.763280', 'step': 15184, 'epoch': 3} {'type': 'loss', 'content': 0.09764396399259567, 'timestamp': '2025-09-30 22:30:52.768146', 'step': 15185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:52.813327', 'step': 15185, 'epoch': 3} {'type': 'loss', 'content': 0.05649332329630852, 'timestamp': '2025-09-30 22:30:52.816935', 'step': 15186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.848364', 'step': 15186, 'epoch': 3} {'type': 'loss', 'content': 0.0710526779294014, 'timestamp': '2025-09-30 22:30:52.854677', 'step': 15187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:52.888820', 'step': 15187, 'epoch': 3} {'type': 'loss', 'content': 0.08105367422103882, 'timestamp': '2025-09-30 22:30:52.915460', 'step': 15188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:52.948381', 'step': 15188, 'epoch': 3} {'type': 'loss', 'content': 0.07858028262853622, 'timestamp': '2025-09-30 22:30:52.964469', 'step': 15189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:52.999033', 'step': 15189, 'epoch': 3} {'type': 'loss', 'content': 0.06685841828584671, 'timestamp': '2025-09-30 22:30:53.003877', 'step': 15190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:53.046420', 'step': 15190, 'epoch': 3} {'type': 'loss', 'content': 0.17208874225616455, 'timestamp': '2025-09-30 22:30:53.052338', 'step': 15191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.085505', 'step': 15191, 'epoch': 3} {'type': 'loss', 'content': 0.09094294905662537, 'timestamp': '2025-09-30 22:30:53.111043', 'step': 15192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:53.142677', 'step': 15192, 'epoch': 3} {'type': 'loss', 'content': 0.07064763456583023, 'timestamp': '2025-09-30 22:30:53.158459', 'step': 15193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.191911', 'step': 15193, 'epoch': 3} {'type': 'loss', 'content': 0.05237232893705368, 'timestamp': '2025-09-30 22:30:53.197395', 'step': 15194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:53.230562', 'step': 15194, 'epoch': 3} {'type': 'loss', 'content': 0.06815006583929062, 'timestamp': '2025-09-30 22:30:53.245746', 'step': 15195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:53.281888', 'step': 15195, 'epoch': 3} {'type': 'loss', 'content': 0.0743970051407814, 'timestamp': '2025-09-30 22:30:53.314963', 'step': 15196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.347451', 'step': 15196, 'epoch': 3} {'type': 'loss', 'content': 0.07769404351711273, 'timestamp': '2025-09-30 22:30:53.352072', 'step': 15197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:53.384543', 'step': 15197, 'epoch': 3} {'type': 'loss', 'content': 0.09263907372951508, 'timestamp': '2025-09-30 22:30:53.388565', 'step': 15198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:53.433914', 'step': 15198, 'epoch': 3} {'type': 'loss', 'content': 0.08674866706132889, 'timestamp': '2025-09-30 22:30:53.437924', 'step': 15199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.469868', 'step': 15199, 'epoch': 3} {'type': 'loss', 'content': 0.11021063476800919, 'timestamp': '2025-09-30 22:30:53.508388', 'step': 15200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:53.553514', 'step': 15200, 'epoch': 3} {'type': 'loss', 'content': 0.03459862247109413, 'timestamp': '2025-09-30 22:30:53.566994', 'step': 15201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.599171', 'step': 15201, 'epoch': 3} {'type': 'loss', 'content': 0.06412278860807419, 'timestamp': '2025-09-30 22:30:53.604588', 'step': 15202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:53.637684', 'step': 15202, 'epoch': 3} {'type': 'loss', 'content': 0.05806891620159149, 'timestamp': '2025-09-30 22:30:53.655703', 'step': 15203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:53.687737', 'step': 15203, 'epoch': 3} {'type': 'loss', 'content': 0.031258322298526764, 'timestamp': '2025-09-30 22:30:53.713375', 'step': 15204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:53.744872', 'step': 15204, 'epoch': 3} {'type': 'loss', 'content': 0.08600947260856628, 'timestamp': '2025-09-30 22:30:53.748833', 'step': 15205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:53.782019', 'step': 15205, 'epoch': 3} {'type': 'loss', 'content': 0.18153594434261322, 'timestamp': '2025-09-30 22:30:53.787931', 'step': 15206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.824032', 'step': 15206, 'epoch': 3} {'type': 'loss', 'content': 0.20984677970409393, 'timestamp': '2025-09-30 22:30:53.837949', 'step': 15207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:53.871404', 'step': 15207, 'epoch': 3} {'type': 'loss', 'content': 0.15128488838672638, 'timestamp': '2025-09-30 22:30:53.909224', 'step': 15208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:53.941084', 'step': 15208, 'epoch': 3} {'type': 'loss', 'content': 0.12477636337280273, 'timestamp': '2025-09-30 22:30:53.945710', 'step': 15209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:53.976841', 'step': 15209, 'epoch': 3} {'type': 'loss', 'content': 0.06651406735181808, 'timestamp': '2025-09-30 22:30:53.994753', 'step': 15210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:54.026614', 'step': 15210, 'epoch': 3} {'type': 'loss', 'content': 0.07891527563333511, 'timestamp': '2025-09-30 22:30:54.030707', 'step': 15211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:54.063884', 'step': 15211, 'epoch': 3} {'type': 'loss', 'content': 0.1451665461063385, 'timestamp': '2025-09-30 22:30:54.090825', 'step': 15212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:54.125705', 'step': 15212, 'epoch': 3} {'type': 'loss', 'content': 0.17168469727039337, 'timestamp': '2025-09-30 22:30:54.140054', 'step': 15213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.183324', 'step': 15213, 'epoch': 3} {'type': 'loss', 'content': 0.11915882676839828, 'timestamp': '2025-09-30 22:30:54.189818', 'step': 15214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:54.242853', 'step': 15214, 'epoch': 3} {'type': 'loss', 'content': 0.12300782650709152, 'timestamp': '2025-09-30 22:30:54.257537', 'step': 15215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.292556', 'step': 15215, 'epoch': 3} {'type': 'loss', 'content': 0.06021857261657715, 'timestamp': '2025-09-30 22:30:54.318480', 'step': 15216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:54.367241', 'step': 15216, 'epoch': 3} {'type': 'loss', 'content': 0.061244502663612366, 'timestamp': '2025-09-30 22:30:54.371482', 'step': 15217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.411811', 'step': 15217, 'epoch': 3} {'type': 'loss', 'content': 0.07790666818618774, 'timestamp': '2025-09-30 22:30:54.415291', 'step': 15218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:54.458673', 'step': 15218, 'epoch': 3} {'type': 'loss', 'content': 0.08615517616271973, 'timestamp': '2025-09-30 22:30:54.465472', 'step': 15219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.500826', 'step': 15219, 'epoch': 3} {'type': 'loss', 'content': 0.10142235457897186, 'timestamp': '2025-09-30 22:30:54.528142', 'step': 15220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.560185', 'step': 15220, 'epoch': 3} {'type': 'loss', 'content': 0.1069692075252533, 'timestamp': '2025-09-30 22:30:54.565539', 'step': 15221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:54.602159', 'step': 15221, 'epoch': 3} {'type': 'loss', 'content': 0.12249702215194702, 'timestamp': '2025-09-30 22:30:54.606981', 'step': 15222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:54.639807', 'step': 15222, 'epoch': 3} {'type': 'loss', 'content': 0.08711075782775879, 'timestamp': '2025-09-30 22:30:54.645517', 'step': 15223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.693327', 'step': 15223, 'epoch': 3} {'type': 'loss', 'content': 0.12672866880893707, 'timestamp': '2025-09-30 22:30:54.722042', 'step': 15224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:30:54.762018', 'step': 15224, 'epoch': 3} {'type': 'loss', 'content': 0.04798821732401848, 'timestamp': '2025-09-30 22:30:54.777897', 'step': 15225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:54.811428', 'step': 15225, 'epoch': 3} {'type': 'loss', 'content': 0.13978925347328186, 'timestamp': '2025-09-30 22:30:54.815480', 'step': 15226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.854398', 'step': 15226, 'epoch': 3} {'type': 'loss', 'content': 0.06262918561697006, 'timestamp': '2025-09-30 22:30:54.859658', 'step': 15227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:54.898075', 'step': 15227, 'epoch': 3} {'type': 'loss', 'content': 0.07197190821170807, 'timestamp': '2025-09-30 22:30:54.922758', 'step': 15228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:54.954168', 'step': 15228, 'epoch': 3} {'type': 'loss', 'content': 0.0834270566701889, 'timestamp': '2025-09-30 22:30:54.959598', 'step': 15229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:54.999054', 'step': 15229, 'epoch': 3} {'type': 'loss', 'content': 0.0951581671833992, 'timestamp': '2025-09-30 22:30:55.004785', 'step': 15230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.038110', 'step': 15230, 'epoch': 3} {'type': 'loss', 'content': 0.14852330088615417, 'timestamp': '2025-09-30 22:30:55.052865', 'step': 15231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:55.096775', 'step': 15231, 'epoch': 3} {'type': 'loss', 'content': 0.06883760541677475, 'timestamp': '2025-09-30 22:30:55.122353', 'step': 15232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.164806', 'step': 15232, 'epoch': 3} {'type': 'loss', 'content': 0.03605658560991287, 'timestamp': '2025-09-30 22:30:55.180148', 'step': 15233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.223958', 'step': 15233, 'epoch': 3} {'type': 'loss', 'content': 0.07703647762537003, 'timestamp': '2025-09-30 22:30:55.238243', 'step': 15234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.279350', 'step': 15234, 'epoch': 3} {'type': 'loss', 'content': 0.057162921875715256, 'timestamp': '2025-09-30 22:30:55.283585', 'step': 15235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.316138', 'step': 15235, 'epoch': 3} {'type': 'loss', 'content': 0.06308247894048691, 'timestamp': '2025-09-30 22:30:55.352162', 'step': 15236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:55.383051', 'step': 15236, 'epoch': 3} {'type': 'loss', 'content': 0.07614214718341827, 'timestamp': '2025-09-30 22:30:55.387540', 'step': 15237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:55.432176', 'step': 15237, 'epoch': 3} {'type': 'loss', 'content': 0.08502732217311859, 'timestamp': '2025-09-30 22:30:55.435436', 'step': 15238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.469020', 'step': 15238, 'epoch': 3} {'type': 'loss', 'content': 0.08370069414377213, 'timestamp': '2025-09-30 22:30:55.479468', 'step': 15239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.511926', 'step': 15239, 'epoch': 3} {'type': 'loss', 'content': 0.09000872820615768, 'timestamp': '2025-09-30 22:30:55.549922', 'step': 15240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:55.582667', 'step': 15240, 'epoch': 3} {'type': 'loss', 'content': 0.16619732975959778, 'timestamp': '2025-09-30 22:30:55.587881', 'step': 15241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.632959', 'step': 15241, 'epoch': 3} {'type': 'loss', 'content': 0.0966271162033081, 'timestamp': '2025-09-30 22:30:55.647438', 'step': 15242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.694745', 'step': 15242, 'epoch': 3} {'type': 'loss', 'content': 0.04355819523334503, 'timestamp': '2025-09-30 22:30:55.699671', 'step': 15243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.733852', 'step': 15243, 'epoch': 3} {'type': 'loss', 'content': 0.05844586342573166, 'timestamp': '2025-09-30 22:30:55.759643', 'step': 15244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:55.793614', 'step': 15244, 'epoch': 3} {'type': 'loss', 'content': 0.07350819557905197, 'timestamp': '2025-09-30 22:30:55.800055', 'step': 15245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.840808', 'step': 15245, 'epoch': 3} {'type': 'loss', 'content': 0.1820838302373886, 'timestamp': '2025-09-30 22:30:55.844247', 'step': 15246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.888428', 'step': 15246, 'epoch': 3} {'type': 'loss', 'content': 0.10218534618616104, 'timestamp': '2025-09-30 22:30:55.894502', 'step': 15247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:55.927812', 'step': 15247, 'epoch': 3} {'type': 'loss', 'content': 0.13426180183887482, 'timestamp': '2025-09-30 22:30:55.954627', 'step': 15248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:55.996301', 'step': 15248, 'epoch': 3} {'type': 'loss', 'content': 0.09553179889917374, 'timestamp': '2025-09-30 22:30:56.001003', 'step': 15249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:56.033530', 'step': 15249, 'epoch': 3} {'type': 'loss', 'content': 0.046484608203172684, 'timestamp': '2025-09-30 22:30:56.039243', 'step': 15250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:56.082697', 'step': 15250, 'epoch': 3} {'type': 'loss', 'content': 0.1532825380563736, 'timestamp': '2025-09-30 22:30:56.097184', 'step': 15251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.128772', 'step': 15251, 'epoch': 3} {'type': 'loss', 'content': 0.1061837375164032, 'timestamp': '2025-09-30 22:30:56.154177', 'step': 15252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.186675', 'step': 15252, 'epoch': 3} {'type': 'loss', 'content': 0.05365791544318199, 'timestamp': '2025-09-30 22:30:56.192727', 'step': 15253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.227664', 'step': 15253, 'epoch': 3} {'type': 'loss', 'content': 0.07781880348920822, 'timestamp': '2025-09-30 22:30:56.231875', 'step': 15254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.274659', 'step': 15254, 'epoch': 3} {'type': 'loss', 'content': 0.05481622740626335, 'timestamp': '2025-09-30 22:30:56.280357', 'step': 15255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:56.316467', 'step': 15255, 'epoch': 3} {'type': 'loss', 'content': 0.08994075655937195, 'timestamp': '2025-09-30 22:30:56.342544', 'step': 15256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.374990', 'step': 15256, 'epoch': 3} {'type': 'loss', 'content': 0.12746968865394592, 'timestamp': '2025-09-30 22:30:56.379704', 'step': 15257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:56.425748', 'step': 15257, 'epoch': 3} {'type': 'loss', 'content': 0.05301667004823685, 'timestamp': '2025-09-30 22:30:56.431382', 'step': 15258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:56.479721', 'step': 15258, 'epoch': 3} {'type': 'loss', 'content': 0.09727878123521805, 'timestamp': '2025-09-30 22:30:56.484409', 'step': 15259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.516642', 'step': 15259, 'epoch': 3} {'type': 'loss', 'content': 0.06649957597255707, 'timestamp': '2025-09-30 22:30:56.542885', 'step': 15260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:56.575957', 'step': 15260, 'epoch': 3} {'type': 'loss', 'content': 0.08913762122392654, 'timestamp': '2025-09-30 22:30:56.581684', 'step': 15261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:56.615112', 'step': 15261, 'epoch': 3} {'type': 'loss', 'content': 0.07920989394187927, 'timestamp': '2025-09-30 22:30:56.619084', 'step': 15262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.650630', 'step': 15262, 'epoch': 3} {'type': 'loss', 'content': 0.0666145384311676, 'timestamp': '2025-09-30 22:30:56.664371', 'step': 15263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.698265', 'step': 15263, 'epoch': 3} {'type': 'loss', 'content': 0.0601888969540596, 'timestamp': '2025-09-30 22:30:56.723594', 'step': 15264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:56.755613', 'step': 15264, 'epoch': 3} {'type': 'loss', 'content': 0.1226271390914917, 'timestamp': '2025-09-30 22:30:56.761337', 'step': 15265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.795249', 'step': 15265, 'epoch': 3} {'type': 'loss', 'content': 0.08135221898555756, 'timestamp': '2025-09-30 22:30:56.800731', 'step': 15266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.833265', 'step': 15266, 'epoch': 3} {'type': 'loss', 'content': 0.1130024865269661, 'timestamp': '2025-09-30 22:30:56.837824', 'step': 15267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:56.869760', 'step': 15267, 'epoch': 3} {'type': 'loss', 'content': 0.03551220893859863, 'timestamp': '2025-09-30 22:30:56.908268', 'step': 15268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:56.940193', 'step': 15268, 'epoch': 3} {'type': 'loss', 'content': 0.08992050588130951, 'timestamp': '2025-09-30 22:30:56.944605', 'step': 15269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:56.978263', 'step': 15269, 'epoch': 3} {'type': 'loss', 'content': 0.04676739498972893, 'timestamp': '2025-09-30 22:30:56.983002', 'step': 15270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.013996', 'step': 15270, 'epoch': 3} {'type': 'loss', 'content': 0.0808660015463829, 'timestamp': '2025-09-30 22:30:57.017456', 'step': 15271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:57.069157', 'step': 15271, 'epoch': 3} {'type': 'loss', 'content': 0.07333593815565109, 'timestamp': '2025-09-30 22:30:57.093496', 'step': 15272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:57.126557', 'step': 15272, 'epoch': 3} {'type': 'loss', 'content': 0.03404131531715393, 'timestamp': '2025-09-30 22:30:57.129938', 'step': 15273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:57.178831', 'step': 15273, 'epoch': 3} {'type': 'loss', 'content': 0.040086425840854645, 'timestamp': '2025-09-30 22:30:57.196409', 'step': 15274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.229276', 'step': 15274, 'epoch': 3} {'type': 'loss', 'content': 0.03469264507293701, 'timestamp': '2025-09-30 22:30:57.234264', 'step': 15275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:57.269445', 'step': 15275, 'epoch': 3} {'type': 'loss', 'content': 0.20196819305419922, 'timestamp': '2025-09-30 22:30:57.296919', 'step': 15276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:57.342911', 'step': 15276, 'epoch': 3} {'type': 'loss', 'content': 0.12432121485471725, 'timestamp': '2025-09-30 22:30:57.348409', 'step': 15277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:57.382070', 'step': 15277, 'epoch': 3} {'type': 'loss', 'content': 0.043942440301179886, 'timestamp': '2025-09-30 22:30:57.400606', 'step': 15278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:57.434172', 'step': 15278, 'epoch': 3} {'type': 'loss', 'content': 0.04582735151052475, 'timestamp': '2025-09-30 22:30:57.441084', 'step': 15279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.475733', 'step': 15279, 'epoch': 3} {'type': 'loss', 'content': 0.1533861756324768, 'timestamp': '2025-09-30 22:30:57.514064', 'step': 15280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:57.545461', 'step': 15280, 'epoch': 3} {'type': 'loss', 'content': 0.031102562323212624, 'timestamp': '2025-09-30 22:30:57.562354', 'step': 15281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.593925', 'step': 15281, 'epoch': 3} {'type': 'loss', 'content': 0.0539330430328846, 'timestamp': '2025-09-30 22:30:57.599760', 'step': 15282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:57.631184', 'step': 15282, 'epoch': 3} {'type': 'loss', 'content': 0.0291338711977005, 'timestamp': '2025-09-30 22:30:57.645725', 'step': 15283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:57.700103', 'step': 15283, 'epoch': 3} {'type': 'loss', 'content': 0.04773140326142311, 'timestamp': '2025-09-30 22:30:57.725940', 'step': 15284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:57.757029', 'step': 15284, 'epoch': 3} {'type': 'loss', 'content': 0.14297018945217133, 'timestamp': '2025-09-30 22:30:57.773057', 'step': 15285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.833026', 'step': 15285, 'epoch': 3} {'type': 'loss', 'content': 0.04717540740966797, 'timestamp': '2025-09-30 22:30:57.837348', 'step': 15286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:57.874158', 'step': 15286, 'epoch': 3} {'type': 'loss', 'content': 0.21258868277072906, 'timestamp': '2025-09-30 22:30:57.878539', 'step': 15287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:57.910426', 'step': 15287, 'epoch': 3} {'type': 'loss', 'content': 0.06598224490880966, 'timestamp': '2025-09-30 22:30:57.942552', 'step': 15288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:57.988700', 'step': 15288, 'epoch': 3} {'type': 'loss', 'content': 0.10097180306911469, 'timestamp': '2025-09-30 22:30:57.994011', 'step': 15289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.029767', 'step': 15289, 'epoch': 3} {'type': 'loss', 'content': 0.027655554935336113, 'timestamp': '2025-09-30 22:30:58.034227', 'step': 15290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.080519', 'step': 15290, 'epoch': 3} {'type': 'loss', 'content': 0.06598860025405884, 'timestamp': '2025-09-30 22:30:58.085640', 'step': 15291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.117717', 'step': 15291, 'epoch': 3} {'type': 'loss', 'content': 0.0927312970161438, 'timestamp': '2025-09-30 22:30:58.143811', 'step': 15292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:30:58.183010', 'step': 15292, 'epoch': 3} {'type': 'loss', 'content': 0.06267382204532623, 'timestamp': '2025-09-30 22:30:58.188121', 'step': 15293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.232055', 'step': 15293, 'epoch': 3} {'type': 'loss', 'content': 0.09815620630979538, 'timestamp': '2025-09-30 22:30:58.240349', 'step': 15294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.274263', 'step': 15294, 'epoch': 3} {'type': 'loss', 'content': 0.10734333842992783, 'timestamp': '2025-09-30 22:30:58.278332', 'step': 15295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.313049', 'step': 15295, 'epoch': 3} {'type': 'loss', 'content': 0.06349372118711472, 'timestamp': '2025-09-30 22:30:58.337784', 'step': 15296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.376863', 'step': 15296, 'epoch': 3} {'type': 'loss', 'content': 0.09945904463529587, 'timestamp': '2025-09-30 22:30:58.380702', 'step': 15297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.434484', 'step': 15297, 'epoch': 3} {'type': 'loss', 'content': 0.0578659325838089, 'timestamp': '2025-09-30 22:30:58.439844', 'step': 15298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.478322', 'step': 15298, 'epoch': 3} {'type': 'loss', 'content': 0.0973205640912056, 'timestamp': '2025-09-30 22:30:58.482851', 'step': 15299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:58.528582', 'step': 15299, 'epoch': 3} {'type': 'loss', 'content': 0.14883950352668762, 'timestamp': '2025-09-30 22:30:58.565975', 'step': 15300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.607710', 'step': 15300, 'epoch': 3} {'type': 'loss', 'content': 0.13325746357440948, 'timestamp': '2025-09-30 22:30:58.614127', 'step': 15301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.646763', 'step': 15301, 'epoch': 3} {'type': 'loss', 'content': 0.16621333360671997, 'timestamp': '2025-09-30 22:30:58.663325', 'step': 15302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:58.696082', 'step': 15302, 'epoch': 3} {'type': 'loss', 'content': 0.052053529769182205, 'timestamp': '2025-09-30 22:30:58.712622', 'step': 15303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.755617', 'step': 15303, 'epoch': 3} {'type': 'loss', 'content': 0.0798996314406395, 'timestamp': '2025-09-30 22:30:58.793852', 'step': 15304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:58.839569', 'step': 15304, 'epoch': 3} {'type': 'loss', 'content': 0.07957015186548233, 'timestamp': '2025-09-30 22:30:58.856338', 'step': 15305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:58.888298', 'step': 15305, 'epoch': 3} {'type': 'loss', 'content': 0.03223773092031479, 'timestamp': '2025-09-30 22:30:58.904887', 'step': 15306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:58.938261', 'step': 15306, 'epoch': 3} {'type': 'loss', 'content': 0.0413544625043869, 'timestamp': '2025-09-30 22:30:58.950728', 'step': 15307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:30:58.984267', 'step': 15307, 'epoch': 3} {'type': 'loss', 'content': 0.09872204065322876, 'timestamp': '2025-09-30 22:30:59.019392', 'step': 15308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:59.052233', 'step': 15308, 'epoch': 3} {'type': 'loss', 'content': 0.08094557374715805, 'timestamp': '2025-09-30 22:30:59.057097', 'step': 15309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:59.089001', 'step': 15309, 'epoch': 3} {'type': 'loss', 'content': 0.06342583149671555, 'timestamp': '2025-09-30 22:30:59.092059', 'step': 15310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:59.124531', 'step': 15310, 'epoch': 3} {'type': 'loss', 'content': 0.0841158926486969, 'timestamp': '2025-09-30 22:30:59.129584', 'step': 15311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:59.174087', 'step': 15311, 'epoch': 3} {'type': 'loss', 'content': 0.08826853334903717, 'timestamp': '2025-09-30 22:30:59.202327', 'step': 15312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:59.249450', 'step': 15312, 'epoch': 3} {'type': 'loss', 'content': 0.09268125146627426, 'timestamp': '2025-09-30 22:30:59.262538', 'step': 15313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:59.303545', 'step': 15313, 'epoch': 3} {'type': 'loss', 'content': 0.04429065063595772, 'timestamp': '2025-09-30 22:30:59.308783', 'step': 15314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:59.351421', 'step': 15314, 'epoch': 3} {'type': 'loss', 'content': 0.15778692066669464, 'timestamp': '2025-09-30 22:30:59.355396', 'step': 15315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:59.387875', 'step': 15315, 'epoch': 3} {'type': 'loss', 'content': 0.08592193573713303, 'timestamp': '2025-09-30 22:30:59.425675', 'step': 15316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:59.469960', 'step': 15316, 'epoch': 3} {'type': 'loss', 'content': 0.07900986820459366, 'timestamp': '2025-09-30 22:30:59.486719', 'step': 15317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:59.528063', 'step': 15317, 'epoch': 3} {'type': 'loss', 'content': 0.15230660140514374, 'timestamp': '2025-09-30 22:30:59.534054', 'step': 15318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:59.579354', 'step': 15318, 'epoch': 3} {'type': 'loss', 'content': 0.04219604656100273, 'timestamp': '2025-09-30 22:30:59.586170', 'step': 15319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:30:59.620003', 'step': 15319, 'epoch': 3} {'type': 'loss', 'content': 0.11542031913995743, 'timestamp': '2025-09-30 22:30:59.654255', 'step': 15320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:30:59.699078', 'step': 15320, 'epoch': 3} {'type': 'loss', 'content': 0.08333505690097809, 'timestamp': '2025-09-30 22:30:59.703642', 'step': 15321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:30:59.738634', 'step': 15321, 'epoch': 3} {'type': 'loss', 'content': 0.09699946641921997, 'timestamp': '2025-09-30 22:30:59.742963', 'step': 15322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:59.779191', 'step': 15322, 'epoch': 3} {'type': 'loss', 'content': 0.09697331488132477, 'timestamp': '2025-09-30 22:30:59.801050', 'step': 15323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:59.857564', 'step': 15323, 'epoch': 3} {'type': 'loss', 'content': 0.02979975752532482, 'timestamp': '2025-09-30 22:30:59.895017', 'step': 15324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:30:59.929819', 'step': 15324, 'epoch': 3} {'type': 'loss', 'content': 0.1373537927865982, 'timestamp': '2025-09-30 22:30:59.948915', 'step': 15325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:30:59.986362', 'step': 15325, 'epoch': 3} {'type': 'loss', 'content': 0.06947845220565796, 'timestamp': '2025-09-30 22:30:59.989957', 'step': 15326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.027198', 'step': 15326, 'epoch': 3} {'type': 'loss', 'content': 0.04342896118760109, 'timestamp': '2025-09-30 22:31:00.033613', 'step': 15327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:00.068276', 'step': 15327, 'epoch': 3} {'type': 'loss', 'content': 0.07398009300231934, 'timestamp': '2025-09-30 22:31:00.095555', 'step': 15328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:00.128644', 'step': 15328, 'epoch': 3} {'type': 'loss', 'content': 0.09261894971132278, 'timestamp': '2025-09-30 22:31:00.138836', 'step': 15329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.192508', 'step': 15329, 'epoch': 3} {'type': 'loss', 'content': 0.0744473785161972, 'timestamp': '2025-09-30 22:31:00.205729', 'step': 15330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.237484', 'step': 15330, 'epoch': 3} {'type': 'loss', 'content': 0.14334502816200256, 'timestamp': '2025-09-30 22:31:00.244901', 'step': 15331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:00.277103', 'step': 15331, 'epoch': 3} {'type': 'loss', 'content': 0.10714039206504822, 'timestamp': '2025-09-30 22:31:00.312807', 'step': 15332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.348694', 'step': 15332, 'epoch': 3} {'type': 'loss', 'content': 0.079057976603508, 'timestamp': '2025-09-30 22:31:00.357356', 'step': 15333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.413313', 'step': 15333, 'epoch': 3} {'type': 'loss', 'content': 0.041419606655836105, 'timestamp': '2025-09-30 22:31:00.416503', 'step': 15334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:00.459102', 'step': 15334, 'epoch': 3} {'type': 'loss', 'content': 0.05587787553668022, 'timestamp': '2025-09-30 22:31:00.465047', 'step': 15335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:00.497674', 'step': 15335, 'epoch': 3} {'type': 'loss', 'content': 0.023691369220614433, 'timestamp': '2025-09-30 22:31:00.525099', 'step': 15336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:00.555288', 'step': 15336, 'epoch': 3} {'type': 'loss', 'content': 0.03670484572649002, 'timestamp': '2025-09-30 22:31:00.559745', 'step': 15337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:00.602963', 'step': 15337, 'epoch': 3} {'type': 'loss', 'content': 0.047298990190029144, 'timestamp': '2025-09-30 22:31:00.619615', 'step': 15338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:00.697264', 'step': 15338, 'epoch': 3} {'type': 'loss', 'content': 0.05439445748925209, 'timestamp': '2025-09-30 22:31:00.701607', 'step': 15339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:00.734258', 'step': 15339, 'epoch': 3} {'type': 'loss', 'content': 0.07250872999429703, 'timestamp': '2025-09-30 22:31:00.761918', 'step': 15340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:00.797504', 'step': 15340, 'epoch': 3} {'type': 'loss', 'content': 0.06482964009046555, 'timestamp': '2025-09-30 22:31:00.804084', 'step': 15341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:00.845036', 'step': 15341, 'epoch': 3} {'type': 'loss', 'content': 0.21673347055912018, 'timestamp': '2025-09-30 22:31:00.851458', 'step': 15342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:00.886270', 'step': 15342, 'epoch': 3} {'type': 'loss', 'content': 0.08036874234676361, 'timestamp': '2025-09-30 22:31:00.891659', 'step': 15343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:00.922905', 'step': 15343, 'epoch': 3} {'type': 'loss', 'content': 0.07839469611644745, 'timestamp': '2025-09-30 22:31:00.959481', 'step': 15344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.001992', 'step': 15344, 'epoch': 3} {'type': 'loss', 'content': 0.04062919318675995, 'timestamp': '2025-09-30 22:31:01.015119', 'step': 15345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:01.062680', 'step': 15345, 'epoch': 3} {'type': 'loss', 'content': 0.08449646830558777, 'timestamp': '2025-09-30 22:31:01.080610', 'step': 15346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:01.132613', 'step': 15346, 'epoch': 3} {'type': 'loss', 'content': 0.07264940440654755, 'timestamp': '2025-09-30 22:31:01.137387', 'step': 15347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:01.180522', 'step': 15347, 'epoch': 3} {'type': 'loss', 'content': 0.09222820401191711, 'timestamp': '2025-09-30 22:31:01.206965', 'step': 15348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.252165', 'step': 15348, 'epoch': 3} {'type': 'loss', 'content': 0.05869017168879509, 'timestamp': '2025-09-30 22:31:01.256492', 'step': 15349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.288801', 'step': 15349, 'epoch': 3} {'type': 'loss', 'content': 0.002705744933336973, 'timestamp': '2025-09-30 22:31:01.304648', 'step': 15350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:01.336742', 'step': 15350, 'epoch': 3} {'type': 'loss', 'content': 0.08015452325344086, 'timestamp': '2025-09-30 22:31:01.340399', 'step': 15351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.371320', 'step': 15351, 'epoch': 3} {'type': 'loss', 'content': 0.07998711615800858, 'timestamp': '2025-09-30 22:31:01.396686', 'step': 15352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.441403', 'step': 15352, 'epoch': 3} {'type': 'loss', 'content': 0.0424853079020977, 'timestamp': '2025-09-30 22:31:01.447240', 'step': 15353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.480673', 'step': 15353, 'epoch': 3} {'type': 'loss', 'content': 0.049901675432920456, 'timestamp': '2025-09-30 22:31:01.484507', 'step': 15354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.516161', 'step': 15354, 'epoch': 3} {'type': 'loss', 'content': 0.08781037479639053, 'timestamp': '2025-09-30 22:31:01.521336', 'step': 15355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:01.556398', 'step': 15355, 'epoch': 3} {'type': 'loss', 'content': 0.03804071992635727, 'timestamp': '2025-09-30 22:31:01.584975', 'step': 15356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:01.619350', 'step': 15356, 'epoch': 3} {'type': 'loss', 'content': 0.1180286854505539, 'timestamp': '2025-09-30 22:31:01.624843', 'step': 15357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:01.660789', 'step': 15357, 'epoch': 3} {'type': 'loss', 'content': 0.12295147776603699, 'timestamp': '2025-09-30 22:31:01.665194', 'step': 15358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:01.720679', 'step': 15358, 'epoch': 3} {'type': 'loss', 'content': 0.03635130822658539, 'timestamp': '2025-09-30 22:31:01.733439', 'step': 15359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.776981', 'step': 15359, 'epoch': 3} {'type': 'loss', 'content': 0.12615123391151428, 'timestamp': '2025-09-30 22:31:01.815649', 'step': 15360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:01.856134', 'step': 15360, 'epoch': 3} {'type': 'loss', 'content': 0.07010284066200256, 'timestamp': '2025-09-30 22:31:01.859279', 'step': 15361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.892839', 'step': 15361, 'epoch': 3} {'type': 'loss', 'content': 0.032096363604068756, 'timestamp': '2025-09-30 22:31:01.907236', 'step': 15362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.940553', 'step': 15362, 'epoch': 3} {'type': 'loss', 'content': 0.07785597443580627, 'timestamp': '2025-09-30 22:31:01.945040', 'step': 15363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:01.984777', 'step': 15363, 'epoch': 3} {'type': 'loss', 'content': 0.10738682746887207, 'timestamp': '2025-09-30 22:31:02.011684', 'step': 15364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:02.044473', 'step': 15364, 'epoch': 3} {'type': 'loss', 'content': 0.11051727086305618, 'timestamp': '2025-09-30 22:31:02.051248', 'step': 15365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:02.096392', 'step': 15365, 'epoch': 3} {'type': 'loss', 'content': 0.15249581634998322, 'timestamp': '2025-09-30 22:31:02.101472', 'step': 15366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:02.134707', 'step': 15366, 'epoch': 3} {'type': 'loss', 'content': 0.10076229274272919, 'timestamp': '2025-09-30 22:31:02.138428', 'step': 15367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:02.175392', 'step': 15367, 'epoch': 3} {'type': 'loss', 'content': 0.08559416979551315, 'timestamp': '2025-09-30 22:31:02.201726', 'step': 15368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:02.246271', 'step': 15368, 'epoch': 3} {'type': 'loss', 'content': 0.10482626408338547, 'timestamp': '2025-09-30 22:31:02.253363', 'step': 15369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:02.285859', 'step': 15369, 'epoch': 3} {'type': 'loss', 'content': 0.09707437455654144, 'timestamp': '2025-09-30 22:31:02.290975', 'step': 15370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:02.331752', 'step': 15370, 'epoch': 3} {'type': 'loss', 'content': 0.07529550045728683, 'timestamp': '2025-09-30 22:31:02.347867', 'step': 15371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:02.387666', 'step': 15371, 'epoch': 3} {'type': 'loss', 'content': 0.03829332813620567, 'timestamp': '2025-09-30 22:31:02.413540', 'step': 15372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:02.468670', 'step': 15372, 'epoch': 3} {'type': 'loss', 'content': 0.08329800516366959, 'timestamp': '2025-09-30 22:31:02.472806', 'step': 15373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:02.511223', 'step': 15373, 'epoch': 3} {'type': 'loss', 'content': 0.11745947599411011, 'timestamp': '2025-09-30 22:31:02.525010', 'step': 15374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:02.571664', 'step': 15374, 'epoch': 3} {'type': 'loss', 'content': 0.11274390667676926, 'timestamp': '2025-09-30 22:31:02.575720', 'step': 15375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:02.610122', 'step': 15375, 'epoch': 3} {'type': 'loss', 'content': 0.06202206015586853, 'timestamp': '2025-09-30 22:31:02.636734', 'step': 15376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:02.669743', 'step': 15376, 'epoch': 3} {'type': 'loss', 'content': 0.10197614133358002, 'timestamp': '2025-09-30 22:31:02.683783', 'step': 15377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:02.727289', 'step': 15377, 'epoch': 3} {'type': 'loss', 'content': 0.04945682734251022, 'timestamp': '2025-09-30 22:31:02.731202', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:31:10.835857', 'step': 15378, 'epoch': 3} {'type': 'pplx', 'content': 10023.637989956822, 'timestamp': '2025-09-30 22:31:10.841445', 'step': 15378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:10.874795', 'step': 15378, 'epoch': 3} {'type': 'loss', 'content': 0.053932663053274155, 'timestamp': '2025-09-30 22:31:10.888066', 'step': 15379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:10.920491', 'step': 15379, 'epoch': 3} {'type': 'loss', 'content': 0.16209769248962402, 'timestamp': '2025-09-30 22:31:10.952389', 'step': 15380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:10.984371', 'step': 15380, 'epoch': 3} {'type': 'loss', 'content': 0.09365349262952805, 'timestamp': '2025-09-30 22:31:10.993916', 'step': 15381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.027408', 'step': 15381, 'epoch': 3} {'type': 'loss', 'content': 0.11215845495462418, 'timestamp': '2025-09-30 22:31:11.038418', 'step': 15382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:11.081366', 'step': 15382, 'epoch': 3} {'type': 'loss', 'content': 0.01493154838681221, 'timestamp': '2025-09-30 22:31:11.085230', 'step': 15383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.126429', 'step': 15383, 'epoch': 3} {'type': 'loss', 'content': 0.09940799325704575, 'timestamp': '2025-09-30 22:31:11.152603', 'step': 15384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:11.185326', 'step': 15384, 'epoch': 3} {'type': 'loss', 'content': 0.05650220066308975, 'timestamp': '2025-09-30 22:31:11.198100', 'step': 15385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.230468', 'step': 15385, 'epoch': 3} {'type': 'loss', 'content': 0.0673820748925209, 'timestamp': '2025-09-30 22:31:11.234170', 'step': 15386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.267841', 'step': 15386, 'epoch': 3} {'type': 'loss', 'content': 0.04026846960186958, 'timestamp': '2025-09-30 22:31:11.271621', 'step': 15387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.315936', 'step': 15387, 'epoch': 3} {'type': 'loss', 'content': 0.10377443581819534, 'timestamp': '2025-09-30 22:31:11.341248', 'step': 15388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.383492', 'step': 15388, 'epoch': 3} {'type': 'loss', 'content': 0.08738207072019577, 'timestamp': '2025-09-30 22:31:11.387827', 'step': 15389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.419993', 'step': 15389, 'epoch': 3} {'type': 'loss', 'content': 0.049302004277706146, 'timestamp': '2025-09-30 22:31:11.426163', 'step': 15390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:11.458169', 'step': 15390, 'epoch': 3} {'type': 'loss', 'content': 0.20675915479660034, 'timestamp': '2025-09-30 22:31:11.476447', 'step': 15391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.507479', 'step': 15391, 'epoch': 3} {'type': 'loss', 'content': 0.07710146903991699, 'timestamp': '2025-09-30 22:31:11.540032', 'step': 15392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:11.583174', 'step': 15392, 'epoch': 3} {'type': 'loss', 'content': 0.04864771291613579, 'timestamp': '2025-09-30 22:31:11.589056', 'step': 15393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:11.626386', 'step': 15393, 'epoch': 3} {'type': 'loss', 'content': 0.06919262558221817, 'timestamp': '2025-09-30 22:31:11.630536', 'step': 15394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:11.669677', 'step': 15394, 'epoch': 3} {'type': 'loss', 'content': 0.09037528187036514, 'timestamp': '2025-09-30 22:31:11.674951', 'step': 15395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.722655', 'step': 15395, 'epoch': 3} {'type': 'loss', 'content': 0.03923584893345833, 'timestamp': '2025-09-30 22:31:11.758650', 'step': 15396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:11.806089', 'step': 15396, 'epoch': 3} {'type': 'loss', 'content': 0.09467368572950363, 'timestamp': '2025-09-30 22:31:11.821752', 'step': 15397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:11.854844', 'step': 15397, 'epoch': 3} {'type': 'loss', 'content': 0.05722297355532646, 'timestamp': '2025-09-30 22:31:11.857805', 'step': 15398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:11.901389', 'step': 15398, 'epoch': 3} {'type': 'loss', 'content': 0.0866754800081253, 'timestamp': '2025-09-30 22:31:11.905313', 'step': 15399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:11.939613', 'step': 15399, 'epoch': 3} {'type': 'loss', 'content': 0.1837567239999771, 'timestamp': '2025-09-30 22:31:11.965486', 'step': 15400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:12.017254', 'step': 15400, 'epoch': 3} {'type': 'loss', 'content': 0.11740043014287949, 'timestamp': '2025-09-30 22:31:12.026394', 'step': 15401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.070348', 'step': 15401, 'epoch': 3} {'type': 'loss', 'content': 0.09978055953979492, 'timestamp': '2025-09-30 22:31:12.087624', 'step': 15402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:12.120734', 'step': 15402, 'epoch': 3} {'type': 'loss', 'content': 0.05724561586976051, 'timestamp': '2025-09-30 22:31:12.138627', 'step': 15403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:12.182516', 'step': 15403, 'epoch': 3} {'type': 'loss', 'content': 0.06477096676826477, 'timestamp': '2025-09-30 22:31:12.210039', 'step': 15404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:12.243274', 'step': 15404, 'epoch': 3} {'type': 'loss', 'content': 0.05716642364859581, 'timestamp': '2025-09-30 22:31:12.258893', 'step': 15405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.300930', 'step': 15405, 'epoch': 3} {'type': 'loss', 'content': 0.03162142261862755, 'timestamp': '2025-09-30 22:31:12.304986', 'step': 15406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:12.348980', 'step': 15406, 'epoch': 3} {'type': 'loss', 'content': 0.028437988832592964, 'timestamp': '2025-09-30 22:31:12.367474', 'step': 15407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.399616', 'step': 15407, 'epoch': 3} {'type': 'loss', 'content': 0.12025302648544312, 'timestamp': '2025-09-30 22:31:12.425709', 'step': 15408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:12.459053', 'step': 15408, 'epoch': 3} {'type': 'loss', 'content': 0.1086622029542923, 'timestamp': '2025-09-30 22:31:12.465210', 'step': 15409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.509368', 'step': 15409, 'epoch': 3} {'type': 'loss', 'content': 0.08675581961870193, 'timestamp': '2025-09-30 22:31:12.526737', 'step': 15410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:12.559632', 'step': 15410, 'epoch': 3} {'type': 'loss', 'content': 0.10605369508266449, 'timestamp': '2025-09-30 22:31:12.577935', 'step': 15411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.614320', 'step': 15411, 'epoch': 3} {'type': 'loss', 'content': 0.04602199047803879, 'timestamp': '2025-09-30 22:31:12.639480', 'step': 15412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.672014', 'step': 15412, 'epoch': 3} {'type': 'loss', 'content': 0.04044424369931221, 'timestamp': '2025-09-30 22:31:12.677425', 'step': 15413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:12.709341', 'step': 15413, 'epoch': 3} {'type': 'loss', 'content': 0.08530563116073608, 'timestamp': '2025-09-30 22:31:12.714260', 'step': 15414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.753615', 'step': 15414, 'epoch': 3} {'type': 'loss', 'content': 0.10339223593473434, 'timestamp': '2025-09-30 22:31:12.756545', 'step': 15415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:12.787096', 'step': 15415, 'epoch': 3} {'type': 'loss', 'content': 0.1311165988445282, 'timestamp': '2025-09-30 22:31:12.812446', 'step': 15416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:12.844292', 'step': 15416, 'epoch': 3} {'type': 'loss', 'content': 0.05867290496826172, 'timestamp': '2025-09-30 22:31:12.847428', 'step': 15417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:12.880733', 'step': 15417, 'epoch': 3} {'type': 'loss', 'content': 0.055050402879714966, 'timestamp': '2025-09-30 22:31:12.894180', 'step': 15418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:12.933330', 'step': 15418, 'epoch': 3} {'type': 'loss', 'content': 0.028847992420196533, 'timestamp': '2025-09-30 22:31:12.937829', 'step': 15419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:12.969353', 'step': 15419, 'epoch': 3} {'type': 'loss', 'content': 0.0343707799911499, 'timestamp': '2025-09-30 22:31:13.008271', 'step': 15420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:13.039808', 'step': 15420, 'epoch': 3} {'type': 'loss', 'content': 0.04261887073516846, 'timestamp': '2025-09-30 22:31:13.043784', 'step': 15421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.076710', 'step': 15421, 'epoch': 3} {'type': 'loss', 'content': 0.10466853529214859, 'timestamp': '2025-09-30 22:31:13.081812', 'step': 15422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:13.113961', 'step': 15422, 'epoch': 3} {'type': 'loss', 'content': 0.05476291850209236, 'timestamp': '2025-09-30 22:31:13.119934', 'step': 15423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:13.158090', 'step': 15423, 'epoch': 3} {'type': 'loss', 'content': 0.05023942515254021, 'timestamp': '2025-09-30 22:31:13.182307', 'step': 15424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:13.214058', 'step': 15424, 'epoch': 3} {'type': 'loss', 'content': 0.037808287888765335, 'timestamp': '2025-09-30 22:31:13.217459', 'step': 15425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:13.258691', 'step': 15425, 'epoch': 3} {'type': 'loss', 'content': 0.07628215849399567, 'timestamp': '2025-09-30 22:31:13.264100', 'step': 15426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.296166', 'step': 15426, 'epoch': 3} {'type': 'loss', 'content': 0.08414195477962494, 'timestamp': '2025-09-30 22:31:13.305926', 'step': 15427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.344270', 'step': 15427, 'epoch': 3} {'type': 'loss', 'content': 0.08277533203363419, 'timestamp': '2025-09-30 22:31:13.370792', 'step': 15428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:13.409245', 'step': 15428, 'epoch': 3} {'type': 'loss', 'content': 0.04053342342376709, 'timestamp': '2025-09-30 22:31:13.413246', 'step': 15429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.446414', 'step': 15429, 'epoch': 3} {'type': 'loss', 'content': 0.0394117571413517, 'timestamp': '2025-09-30 22:31:13.449747', 'step': 15430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:13.482353', 'step': 15430, 'epoch': 3} {'type': 'loss', 'content': 0.02263796702027321, 'timestamp': '2025-09-30 22:31:13.490713', 'step': 15431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.524580', 'step': 15431, 'epoch': 3} {'type': 'loss', 'content': 0.04216155782341957, 'timestamp': '2025-09-30 22:31:13.549929', 'step': 15432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.581786', 'step': 15432, 'epoch': 3} {'type': 'loss', 'content': 0.10312830656766891, 'timestamp': '2025-09-30 22:31:13.585242', 'step': 15433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:13.623634', 'step': 15433, 'epoch': 3} {'type': 'loss', 'content': 0.08417514711618423, 'timestamp': '2025-09-30 22:31:13.628720', 'step': 15434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:13.660843', 'step': 15434, 'epoch': 3} {'type': 'loss', 'content': 0.046152278780937195, 'timestamp': '2025-09-30 22:31:13.671015', 'step': 15435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:13.702559', 'step': 15435, 'epoch': 3} {'type': 'loss', 'content': 0.05905509367585182, 'timestamp': '2025-09-30 22:31:13.728994', 'step': 15436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.761390', 'step': 15436, 'epoch': 3} {'type': 'loss', 'content': 0.09399916231632233, 'timestamp': '2025-09-30 22:31:13.766115', 'step': 15437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:13.811226', 'step': 15437, 'epoch': 3} {'type': 'loss', 'content': 0.05633040517568588, 'timestamp': '2025-09-30 22:31:13.821614', 'step': 15438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:13.855373', 'step': 15438, 'epoch': 3} {'type': 'loss', 'content': 0.1280764937400818, 'timestamp': '2025-09-30 22:31:13.860465', 'step': 15439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:13.901503', 'step': 15439, 'epoch': 3} {'type': 'loss', 'content': 0.04436807334423065, 'timestamp': '2025-09-30 22:31:13.938729', 'step': 15440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:13.974672', 'step': 15440, 'epoch': 3} {'type': 'loss', 'content': 0.04687827080488205, 'timestamp': '2025-09-30 22:31:13.978568', 'step': 15441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.028131', 'step': 15441, 'epoch': 3} {'type': 'loss', 'content': 0.15229767560958862, 'timestamp': '2025-09-30 22:31:14.032212', 'step': 15442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.063092', 'step': 15442, 'epoch': 3} {'type': 'loss', 'content': 0.08541735261678696, 'timestamp': '2025-09-30 22:31:14.075709', 'step': 15443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:14.107893', 'step': 15443, 'epoch': 3} {'type': 'loss', 'content': 0.01952289789915085, 'timestamp': '2025-09-30 22:31:14.140451', 'step': 15444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:14.170936', 'step': 15444, 'epoch': 3} {'type': 'loss', 'content': 0.08489872515201569, 'timestamp': '2025-09-30 22:31:14.175752', 'step': 15445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:14.208627', 'step': 15445, 'epoch': 3} {'type': 'loss', 'content': 0.1071418821811676, 'timestamp': '2025-09-30 22:31:14.220829', 'step': 15446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:14.252339', 'step': 15446, 'epoch': 3} {'type': 'loss', 'content': 0.10972300916910172, 'timestamp': '2025-09-30 22:31:14.260473', 'step': 15447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:14.292813', 'step': 15447, 'epoch': 3} {'type': 'loss', 'content': 0.08796076476573944, 'timestamp': '2025-09-30 22:31:14.318194', 'step': 15448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:14.351164', 'step': 15448, 'epoch': 3} {'type': 'loss', 'content': 0.10531973838806152, 'timestamp': '2025-09-30 22:31:14.356884', 'step': 15449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.387617', 'step': 15449, 'epoch': 3} {'type': 'loss', 'content': 0.09997626394033432, 'timestamp': '2025-09-30 22:31:14.402416', 'step': 15450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:14.434194', 'step': 15450, 'epoch': 3} {'type': 'loss', 'content': 0.06987781077623367, 'timestamp': '2025-09-30 22:31:14.437908', 'step': 15451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:14.481661', 'step': 15451, 'epoch': 3} {'type': 'loss', 'content': 0.04272155836224556, 'timestamp': '2025-09-30 22:31:14.509763', 'step': 15452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:31:14.542790', 'step': 15452, 'epoch': 3} {'type': 'loss', 'content': 0.08260827511548996, 'timestamp': '2025-09-30 22:31:14.558576', 'step': 15453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:14.591469', 'step': 15453, 'epoch': 3} {'type': 'loss', 'content': 0.050751183182001114, 'timestamp': '2025-09-30 22:31:14.595721', 'step': 15454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.628340', 'step': 15454, 'epoch': 3} {'type': 'loss', 'content': 0.08251569420099258, 'timestamp': '2025-09-30 22:31:14.642219', 'step': 15455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:14.690471', 'step': 15455, 'epoch': 3} {'type': 'loss', 'content': 0.13278773427009583, 'timestamp': '2025-09-30 22:31:14.715011', 'step': 15456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.753236', 'step': 15456, 'epoch': 3} {'type': 'loss', 'content': 0.03182579576969147, 'timestamp': '2025-09-30 22:31:14.758300', 'step': 15457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.791017', 'step': 15457, 'epoch': 3} {'type': 'loss', 'content': 0.04406189173460007, 'timestamp': '2025-09-30 22:31:14.800395', 'step': 15458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.833614', 'step': 15458, 'epoch': 3} {'type': 'loss', 'content': 0.12061035633087158, 'timestamp': '2025-09-30 22:31:14.837271', 'step': 15459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:14.870074', 'step': 15459, 'epoch': 3} {'type': 'loss', 'content': 0.05578291788697243, 'timestamp': '2025-09-30 22:31:14.903771', 'step': 15460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.948713', 'step': 15460, 'epoch': 3} {'type': 'loss', 'content': 0.05260969698429108, 'timestamp': '2025-09-30 22:31:14.952401', 'step': 15461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:14.994452', 'step': 15461, 'epoch': 3} {'type': 'loss', 'content': 0.06986048072576523, 'timestamp': '2025-09-30 22:31:15.010286', 'step': 15462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.053104', 'step': 15462, 'epoch': 3} {'type': 'loss', 'content': 0.045494284480810165, 'timestamp': '2025-09-30 22:31:15.059208', 'step': 15463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:15.093577', 'step': 15463, 'epoch': 3} {'type': 'loss', 'content': 0.02601601555943489, 'timestamp': '2025-09-30 22:31:15.118014', 'step': 15464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.173501', 'step': 15464, 'epoch': 3} {'type': 'loss', 'content': 0.07005489617586136, 'timestamp': '2025-09-30 22:31:15.176834', 'step': 15465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.214355', 'step': 15465, 'epoch': 3} {'type': 'loss', 'content': 0.05766046792268753, 'timestamp': '2025-09-30 22:31:15.219779', 'step': 15466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.253745', 'step': 15466, 'epoch': 3} {'type': 'loss', 'content': 0.05508745461702347, 'timestamp': '2025-09-30 22:31:15.268229', 'step': 15467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:15.300622', 'step': 15467, 'epoch': 3} {'type': 'loss', 'content': 0.11415557563304901, 'timestamp': '2025-09-30 22:31:15.327276', 'step': 15468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.358656', 'step': 15468, 'epoch': 3} {'type': 'loss', 'content': 0.09050449728965759, 'timestamp': '2025-09-30 22:31:15.362771', 'step': 15469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.394771', 'step': 15469, 'epoch': 3} {'type': 'loss', 'content': 0.052189022302627563, 'timestamp': '2025-09-30 22:31:15.397468', 'step': 15470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.432276', 'step': 15470, 'epoch': 3} {'type': 'loss', 'content': 0.051339246332645416, 'timestamp': '2025-09-30 22:31:15.437596', 'step': 15471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.470998', 'step': 15471, 'epoch': 3} {'type': 'loss', 'content': 0.07942165434360504, 'timestamp': '2025-09-30 22:31:15.505236', 'step': 15472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.535321', 'step': 15472, 'epoch': 3} {'type': 'loss', 'content': 0.06543644517660141, 'timestamp': '2025-09-30 22:31:15.544513', 'step': 15473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.584049', 'step': 15473, 'epoch': 3} {'type': 'loss', 'content': 0.0481584370136261, 'timestamp': '2025-09-30 22:31:15.595720', 'step': 15474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.636749', 'step': 15474, 'epoch': 3} {'type': 'loss', 'content': 0.07942453771829605, 'timestamp': '2025-09-30 22:31:15.640384', 'step': 15475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.672865', 'step': 15475, 'epoch': 3} {'type': 'loss', 'content': 0.06673786044120789, 'timestamp': '2025-09-30 22:31:15.708035', 'step': 15476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.741959', 'step': 15476, 'epoch': 3} {'type': 'loss', 'content': 0.04223279282450676, 'timestamp': '2025-09-30 22:31:15.744627', 'step': 15477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:15.784758', 'step': 15477, 'epoch': 3} {'type': 'loss', 'content': 0.03112972155213356, 'timestamp': '2025-09-30 22:31:15.793903', 'step': 15478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.836004', 'step': 15478, 'epoch': 3} {'type': 'loss', 'content': 0.07616838067770004, 'timestamp': '2025-09-30 22:31:15.848471', 'step': 15479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:15.879501', 'step': 15479, 'epoch': 3} {'type': 'loss', 'content': 0.06325910240411758, 'timestamp': '2025-09-30 22:31:15.907118', 'step': 15480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:15.951542', 'step': 15480, 'epoch': 3} {'type': 'loss', 'content': 0.10239992290735245, 'timestamp': '2025-09-30 22:31:15.955284', 'step': 15481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:15.999433', 'step': 15481, 'epoch': 3} {'type': 'loss', 'content': 0.0781756043434143, 'timestamp': '2025-09-30 22:31:16.011213', 'step': 15482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:16.051032', 'step': 15482, 'epoch': 3} {'type': 'loss', 'content': 0.07800254970788956, 'timestamp': '2025-09-30 22:31:16.054145', 'step': 15483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:16.093889', 'step': 15483, 'epoch': 3} {'type': 'loss', 'content': 0.15058137476444244, 'timestamp': '2025-09-30 22:31:16.135251', 'step': 15484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:31:16.168163', 'step': 15484, 'epoch': 3} {'type': 'loss', 'content': 0.10180419683456421, 'timestamp': '2025-09-30 22:31:16.183712', 'step': 15485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.217457', 'step': 15485, 'epoch': 3} {'type': 'loss', 'content': 0.06994578242301941, 'timestamp': '2025-09-30 22:31:16.221997', 'step': 15486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:16.264647', 'step': 15486, 'epoch': 3} {'type': 'loss', 'content': 0.049522075802087784, 'timestamp': '2025-09-30 22:31:16.267386', 'step': 15487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:16.318742', 'step': 15487, 'epoch': 3} {'type': 'loss', 'content': 0.08509048074483871, 'timestamp': '2025-09-30 22:31:16.344478', 'step': 15488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.388552', 'step': 15488, 'epoch': 3} {'type': 'loss', 'content': 0.06018887832760811, 'timestamp': '2025-09-30 22:31:16.404201', 'step': 15489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.436040', 'step': 15489, 'epoch': 3} {'type': 'loss', 'content': 0.08777749538421631, 'timestamp': '2025-09-30 22:31:16.440008', 'step': 15490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:16.471004', 'step': 15490, 'epoch': 3} {'type': 'loss', 'content': 0.06363099068403244, 'timestamp': '2025-09-30 22:31:16.483412', 'step': 15491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.532615', 'step': 15491, 'epoch': 3} {'type': 'loss', 'content': 0.02043367736041546, 'timestamp': '2025-09-30 22:31:16.558098', 'step': 15492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.597347', 'step': 15492, 'epoch': 3} {'type': 'loss', 'content': 0.05741129443049431, 'timestamp': '2025-09-30 22:31:16.611081', 'step': 15493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:16.644632', 'step': 15493, 'epoch': 3} {'type': 'loss', 'content': 0.1827024519443512, 'timestamp': '2025-09-30 22:31:16.649247', 'step': 15494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.680796', 'step': 15494, 'epoch': 3} {'type': 'loss', 'content': 0.024149606004357338, 'timestamp': '2025-09-30 22:31:16.686689', 'step': 15495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:16.721753', 'step': 15495, 'epoch': 3} {'type': 'loss', 'content': 0.052045632153749466, 'timestamp': '2025-09-30 22:31:16.756097', 'step': 15496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:16.798395', 'step': 15496, 'epoch': 3} {'type': 'loss', 'content': 0.017283620312809944, 'timestamp': '2025-09-30 22:31:16.811658', 'step': 15497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:16.859583', 'step': 15497, 'epoch': 3} {'type': 'loss', 'content': 0.10738550871610641, 'timestamp': '2025-09-30 22:31:16.862999', 'step': 15498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:16.906977', 'step': 15498, 'epoch': 3} {'type': 'loss', 'content': 0.12070482224225998, 'timestamp': '2025-09-30 22:31:16.911570', 'step': 15499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:16.943624', 'step': 15499, 'epoch': 3} {'type': 'loss', 'content': 0.13511140644550323, 'timestamp': '2025-09-30 22:31:16.967633', 'step': 15500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 15500', 'timestamp': '2025-09-30 22:31:22.275631', 'step': 15500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:22.318119', 'step': 15500, 'epoch': 3} {'type': 'loss', 'content': 0.054729681462049484, 'timestamp': '2025-09-30 22:31:22.323864', 'step': 15501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:22.392069', 'step': 15501, 'epoch': 3} {'type': 'loss', 'content': 0.04975195974111557, 'timestamp': '2025-09-30 22:31:22.394974', 'step': 15502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:22.450422', 'step': 15502, 'epoch': 3} {'type': 'loss', 'content': 0.05311770364642143, 'timestamp': '2025-09-30 22:31:22.453119', 'step': 15503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:22.513180', 'step': 15503, 'epoch': 3} {'type': 'loss', 'content': 0.1511741578578949, 'timestamp': '2025-09-30 22:31:22.548976', 'step': 15504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:22.612490', 'step': 15504, 'epoch': 3} {'type': 'loss', 'content': 0.08598948270082474, 'timestamp': '2025-09-30 22:31:22.627508', 'step': 15505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:31:22.700362', 'step': 15505, 'epoch': 3} {'type': 'loss', 'content': 0.08452264964580536, 'timestamp': '2025-09-30 22:31:22.710665', 'step': 15506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:22.787721', 'step': 15506, 'epoch': 3} {'type': 'loss', 'content': 0.08644989877939224, 'timestamp': '2025-09-30 22:31:22.791432', 'step': 15507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:22.863685', 'step': 15507, 'epoch': 3} {'type': 'loss', 'content': 0.09041909873485565, 'timestamp': '2025-09-30 22:31:22.896271', 'step': 15508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:22.944098', 'step': 15508, 'epoch': 3} {'type': 'loss', 'content': 0.18187451362609863, 'timestamp': '2025-09-30 22:31:22.952796', 'step': 15509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:22.991912', 'step': 15509, 'epoch': 3} {'type': 'loss', 'content': 0.06912028789520264, 'timestamp': '2025-09-30 22:31:22.997642', 'step': 15510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:23.030284', 'step': 15510, 'epoch': 3} {'type': 'loss', 'content': 0.083115354180336, 'timestamp': '2025-09-30 22:31:23.041501', 'step': 15511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:31:23.074181', 'step': 15511, 'epoch': 3} {'type': 'loss', 'content': 0.09279374033212662, 'timestamp': '2025-09-30 22:31:23.117522', 'step': 15512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:23.166272', 'step': 15512, 'epoch': 3} {'type': 'loss', 'content': 0.10632248222827911, 'timestamp': '2025-09-30 22:31:23.169578', 'step': 15513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:23.202160', 'step': 15513, 'epoch': 3} {'type': 'loss', 'content': 0.059740033000707626, 'timestamp': '2025-09-30 22:31:23.206546', 'step': 15514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.240330', 'step': 15514, 'epoch': 3} {'type': 'loss', 'content': 0.09026163071393967, 'timestamp': '2025-09-30 22:31:23.244388', 'step': 15515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.285681', 'step': 15515, 'epoch': 3} {'type': 'loss', 'content': 0.10563905537128448, 'timestamp': '2025-09-30 22:31:23.312736', 'step': 15516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:23.352902', 'step': 15516, 'epoch': 3} {'type': 'loss', 'content': 0.049714766442775726, 'timestamp': '2025-09-30 22:31:23.357284', 'step': 15517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:23.390460', 'step': 15517, 'epoch': 3} {'type': 'loss', 'content': 0.09814497083425522, 'timestamp': '2025-09-30 22:31:23.393890', 'step': 15518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:23.427646', 'step': 15518, 'epoch': 3} {'type': 'loss', 'content': 0.05149205029010773, 'timestamp': '2025-09-30 22:31:23.434152', 'step': 15519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:23.466965', 'step': 15519, 'epoch': 3} {'type': 'loss', 'content': 0.09034339338541031, 'timestamp': '2025-09-30 22:31:23.493449', 'step': 15520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.526311', 'step': 15520, 'epoch': 3} {'type': 'loss', 'content': 0.02304023690521717, 'timestamp': '2025-09-30 22:31:23.530898', 'step': 15521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:23.563724', 'step': 15521, 'epoch': 3} {'type': 'loss', 'content': 0.1414060741662979, 'timestamp': '2025-09-30 22:31:23.568582', 'step': 15522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:23.604327', 'step': 15522, 'epoch': 3} {'type': 'loss', 'content': 0.08650033921003342, 'timestamp': '2025-09-30 22:31:23.622694', 'step': 15523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.656175', 'step': 15523, 'epoch': 3} {'type': 'loss', 'content': 0.08510484546422958, 'timestamp': '2025-09-30 22:31:23.684054', 'step': 15524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:23.717874', 'step': 15524, 'epoch': 3} {'type': 'loss', 'content': 0.08034110069274902, 'timestamp': '2025-09-30 22:31:23.724218', 'step': 15525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.767791', 'step': 15525, 'epoch': 3} {'type': 'loss', 'content': 0.047657884657382965, 'timestamp': '2025-09-30 22:31:23.772894', 'step': 15526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:23.807479', 'step': 15526, 'epoch': 3} {'type': 'loss', 'content': 0.02649991773068905, 'timestamp': '2025-09-30 22:31:23.822869', 'step': 15527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:23.865289', 'step': 15527, 'epoch': 3} {'type': 'loss', 'content': 0.08327517658472061, 'timestamp': '2025-09-30 22:31:23.904079', 'step': 15528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:23.938035', 'step': 15528, 'epoch': 3} {'type': 'loss', 'content': 0.06841979175806046, 'timestamp': '2025-09-30 22:31:23.943835', 'step': 15529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:23.990464', 'step': 15529, 'epoch': 3} {'type': 'loss', 'content': 0.08813488483428955, 'timestamp': '2025-09-30 22:31:23.995637', 'step': 15530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.028173', 'step': 15530, 'epoch': 3} {'type': 'loss', 'content': 0.0960949957370758, 'timestamp': '2025-09-30 22:31:24.032861', 'step': 15531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.064440', 'step': 15531, 'epoch': 3} {'type': 'loss', 'content': 0.07084185630083084, 'timestamp': '2025-09-30 22:31:24.090940', 'step': 15532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.139747', 'step': 15532, 'epoch': 3} {'type': 'loss', 'content': 0.06367272138595581, 'timestamp': '2025-09-30 22:31:24.144567', 'step': 15533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.187647', 'step': 15533, 'epoch': 3} {'type': 'loss', 'content': 0.06440945714712143, 'timestamp': '2025-09-30 22:31:24.192505', 'step': 15534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.224507', 'step': 15534, 'epoch': 3} {'type': 'loss', 'content': 0.14570975303649902, 'timestamp': '2025-09-30 22:31:24.229358', 'step': 15535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.261769', 'step': 15535, 'epoch': 3} {'type': 'loss', 'content': 0.11365901678800583, 'timestamp': '2025-09-30 22:31:24.298138', 'step': 15536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.331157', 'step': 15536, 'epoch': 3} {'type': 'loss', 'content': 0.1034715473651886, 'timestamp': '2025-09-30 22:31:24.335914', 'step': 15537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:24.383780', 'step': 15537, 'epoch': 3} {'type': 'loss', 'content': 0.0861147865653038, 'timestamp': '2025-09-30 22:31:24.387719', 'step': 15538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.419769', 'step': 15538, 'epoch': 3} {'type': 'loss', 'content': 0.12679006159305573, 'timestamp': '2025-09-30 22:31:24.432992', 'step': 15539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:24.472593', 'step': 15539, 'epoch': 3} {'type': 'loss', 'content': 0.113809734582901, 'timestamp': '2025-09-30 22:31:24.497247', 'step': 15540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.528601', 'step': 15540, 'epoch': 3} {'type': 'loss', 'content': 0.07004711031913757, 'timestamp': '2025-09-30 22:31:24.532699', 'step': 15541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:24.574851', 'step': 15541, 'epoch': 3} {'type': 'loss', 'content': 0.12043752521276474, 'timestamp': '2025-09-30 22:31:24.585931', 'step': 15542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.620315', 'step': 15542, 'epoch': 3} {'type': 'loss', 'content': 0.10546049475669861, 'timestamp': '2025-09-30 22:31:24.625455', 'step': 15543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:24.657665', 'step': 15543, 'epoch': 3} {'type': 'loss', 'content': 0.058463532477617264, 'timestamp': '2025-09-30 22:31:24.683271', 'step': 15544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.726594', 'step': 15544, 'epoch': 3} {'type': 'loss', 'content': 0.04611640423536301, 'timestamp': '2025-09-30 22:31:24.730594', 'step': 15545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:24.772908', 'step': 15545, 'epoch': 3} {'type': 'loss', 'content': 0.11521086096763611, 'timestamp': '2025-09-30 22:31:24.777404', 'step': 15546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:24.810689', 'step': 15546, 'epoch': 3} {'type': 'loss', 'content': 0.17079223692417145, 'timestamp': '2025-09-30 22:31:24.815638', 'step': 15547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:24.858064', 'step': 15547, 'epoch': 3} {'type': 'loss', 'content': 0.0761198028922081, 'timestamp': '2025-09-30 22:31:24.884019', 'step': 15548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.929090', 'step': 15548, 'epoch': 3} {'type': 'loss', 'content': 0.13458876311779022, 'timestamp': '2025-09-30 22:31:24.933744', 'step': 15549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:24.965219', 'step': 15549, 'epoch': 3} {'type': 'loss', 'content': 0.03572726622223854, 'timestamp': '2025-09-30 22:31:24.971242', 'step': 15550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.005104', 'step': 15550, 'epoch': 3} {'type': 'loss', 'content': 0.06856359541416168, 'timestamp': '2025-09-30 22:31:25.009620', 'step': 15551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.044809', 'step': 15551, 'epoch': 3} {'type': 'loss', 'content': 0.06209775060415268, 'timestamp': '2025-09-30 22:31:25.072325', 'step': 15552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.105679', 'step': 15552, 'epoch': 3} {'type': 'loss', 'content': 0.11548270285129547, 'timestamp': '2025-09-30 22:31:25.111791', 'step': 15553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.156486', 'step': 15553, 'epoch': 3} {'type': 'loss', 'content': 0.10994231700897217, 'timestamp': '2025-09-30 22:31:25.162366', 'step': 15554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:25.194688', 'step': 15554, 'epoch': 3} {'type': 'loss', 'content': 0.05451570823788643, 'timestamp': '2025-09-30 22:31:25.198635', 'step': 15555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:25.230485', 'step': 15555, 'epoch': 3} {'type': 'loss', 'content': 0.08542141318321228, 'timestamp': '2025-09-30 22:31:25.256608', 'step': 15556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.303406', 'step': 15556, 'epoch': 3} {'type': 'loss', 'content': 0.08700477331876755, 'timestamp': '2025-09-30 22:31:25.308436', 'step': 15557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:25.341334', 'step': 15557, 'epoch': 3} {'type': 'loss', 'content': 0.10199949890375137, 'timestamp': '2025-09-30 22:31:25.357699', 'step': 15558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.389599', 'step': 15558, 'epoch': 3} {'type': 'loss', 'content': 0.15032170712947845, 'timestamp': '2025-09-30 22:31:25.394240', 'step': 15559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:25.427824', 'step': 15559, 'epoch': 3} {'type': 'loss', 'content': 0.03246127814054489, 'timestamp': '2025-09-30 22:31:25.453454', 'step': 15560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.486097', 'step': 15560, 'epoch': 3} {'type': 'loss', 'content': 0.06965538114309311, 'timestamp': '2025-09-30 22:31:25.504747', 'step': 15561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.549201', 'step': 15561, 'epoch': 3} {'type': 'loss', 'content': 0.022364187985658646, 'timestamp': '2025-09-30 22:31:25.559733', 'step': 15562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.598831', 'step': 15562, 'epoch': 3} {'type': 'loss', 'content': 0.09422536194324493, 'timestamp': '2025-09-30 22:31:25.603371', 'step': 15563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.635678', 'step': 15563, 'epoch': 3} {'type': 'loss', 'content': 0.1021375060081482, 'timestamp': '2025-09-30 22:31:25.660663', 'step': 15564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.692916', 'step': 15564, 'epoch': 3} {'type': 'loss', 'content': 0.08568296581506729, 'timestamp': '2025-09-30 22:31:25.707914', 'step': 15565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.751098', 'step': 15565, 'epoch': 3} {'type': 'loss', 'content': 0.04804432764649391, 'timestamp': '2025-09-30 22:31:25.760358', 'step': 15566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.793071', 'step': 15566, 'epoch': 3} {'type': 'loss', 'content': 0.1627202033996582, 'timestamp': '2025-09-30 22:31:25.811220', 'step': 15567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.856467', 'step': 15567, 'epoch': 3} {'type': 'loss', 'content': 0.1364404559135437, 'timestamp': '2025-09-30 22:31:25.881782', 'step': 15568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:25.917458', 'step': 15568, 'epoch': 3} {'type': 'loss', 'content': 0.120889812707901, 'timestamp': '2025-09-30 22:31:25.921370', 'step': 15569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:25.969526', 'step': 15569, 'epoch': 3} {'type': 'loss', 'content': 0.08122119307518005, 'timestamp': '2025-09-30 22:31:25.973704', 'step': 15570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:26.006385', 'step': 15570, 'epoch': 3} {'type': 'loss', 'content': 0.07508527487516403, 'timestamp': '2025-09-30 22:31:26.021838', 'step': 15571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:26.059171', 'step': 15571, 'epoch': 3} {'type': 'loss', 'content': 0.04268049821257591, 'timestamp': '2025-09-30 22:31:26.083399', 'step': 15572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:26.130475', 'step': 15572, 'epoch': 3} {'type': 'loss', 'content': 0.085859015583992, 'timestamp': '2025-09-30 22:31:26.135220', 'step': 15573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.194745', 'step': 15573, 'epoch': 3} {'type': 'loss', 'content': 0.06371355056762695, 'timestamp': '2025-09-30 22:31:26.198876', 'step': 15574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:26.230542', 'step': 15574, 'epoch': 3} {'type': 'loss', 'content': 0.16146713495254517, 'timestamp': '2025-09-30 22:31:26.242935', 'step': 15575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.274671', 'step': 15575, 'epoch': 3} {'type': 'loss', 'content': 0.046668726950883865, 'timestamp': '2025-09-30 22:31:26.299847', 'step': 15576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:26.329463', 'step': 15576, 'epoch': 3} {'type': 'loss', 'content': 0.06660979241132736, 'timestamp': '2025-09-30 22:31:26.338976', 'step': 15577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.373399', 'step': 15577, 'epoch': 3} {'type': 'loss', 'content': 0.07661096006631851, 'timestamp': '2025-09-30 22:31:26.376475', 'step': 15578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:26.407452', 'step': 15578, 'epoch': 3} {'type': 'loss', 'content': 0.06218169629573822, 'timestamp': '2025-09-30 22:31:26.410649', 'step': 15579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.442289', 'step': 15579, 'epoch': 3} {'type': 'loss', 'content': 0.08175697177648544, 'timestamp': '2025-09-30 22:31:26.466822', 'step': 15580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.499269', 'step': 15580, 'epoch': 3} {'type': 'loss', 'content': 0.15353833138942719, 'timestamp': '2025-09-30 22:31:26.503100', 'step': 15581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.536570', 'step': 15581, 'epoch': 3} {'type': 'loss', 'content': 0.11065928637981415, 'timestamp': '2025-09-30 22:31:26.543766', 'step': 15582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.579087', 'step': 15582, 'epoch': 3} {'type': 'loss', 'content': 0.1501350849866867, 'timestamp': '2025-09-30 22:31:26.592081', 'step': 15583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.625343', 'step': 15583, 'epoch': 3} {'type': 'loss', 'content': 0.06661484390497208, 'timestamp': '2025-09-30 22:31:26.665117', 'step': 15584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:26.699360', 'step': 15584, 'epoch': 3} {'type': 'loss', 'content': 0.09038426727056503, 'timestamp': '2025-09-30 22:31:26.704179', 'step': 15585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:26.738494', 'step': 15585, 'epoch': 3} {'type': 'loss', 'content': 0.07893988490104675, 'timestamp': '2025-09-30 22:31:26.744736', 'step': 15586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:26.779737', 'step': 15586, 'epoch': 3} {'type': 'loss', 'content': 0.13025020062923431, 'timestamp': '2025-09-30 22:31:26.797753', 'step': 15587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:26.830945', 'step': 15587, 'epoch': 3} {'type': 'loss', 'content': 0.09913772344589233, 'timestamp': '2025-09-30 22:31:26.868132', 'step': 15588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:26.917443', 'step': 15588, 'epoch': 3} {'type': 'loss', 'content': 0.17433950304985046, 'timestamp': '2025-09-30 22:31:26.933470', 'step': 15589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:26.975875', 'step': 15589, 'epoch': 3} {'type': 'loss', 'content': 0.07448375225067139, 'timestamp': '2025-09-30 22:31:26.980209', 'step': 15590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.026007', 'step': 15590, 'epoch': 3} {'type': 'loss', 'content': 0.10251377522945404, 'timestamp': '2025-09-30 22:31:27.039562', 'step': 15591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:27.082275', 'step': 15591, 'epoch': 3} {'type': 'loss', 'content': 0.04911204054951668, 'timestamp': '2025-09-30 22:31:27.108299', 'step': 15592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.141702', 'step': 15592, 'epoch': 3} {'type': 'loss', 'content': 0.07733398675918579, 'timestamp': '2025-09-30 22:31:27.147613', 'step': 15593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.180172', 'step': 15593, 'epoch': 3} {'type': 'loss', 'content': 0.0798381119966507, 'timestamp': '2025-09-30 22:31:27.182960', 'step': 15594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.225407', 'step': 15594, 'epoch': 3} {'type': 'loss', 'content': 0.10191839933395386, 'timestamp': '2025-09-30 22:31:27.230762', 'step': 15595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:27.262484', 'step': 15595, 'epoch': 3} {'type': 'loss', 'content': 0.01930791698396206, 'timestamp': '2025-09-30 22:31:27.287650', 'step': 15596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.321807', 'step': 15596, 'epoch': 3} {'type': 'loss', 'content': 0.07312367856502533, 'timestamp': '2025-09-30 22:31:27.326779', 'step': 15597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.359274', 'step': 15597, 'epoch': 3} {'type': 'loss', 'content': 0.0799432098865509, 'timestamp': '2025-09-30 22:31:27.362557', 'step': 15598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:27.394332', 'step': 15598, 'epoch': 3} {'type': 'loss', 'content': 0.10419339686632156, 'timestamp': '2025-09-30 22:31:27.398785', 'step': 15599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:27.432791', 'step': 15599, 'epoch': 3} {'type': 'loss', 'content': 0.05326404049992561, 'timestamp': '2025-09-30 22:31:27.467834', 'step': 15600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.499314', 'step': 15600, 'epoch': 3} {'type': 'loss', 'content': 0.056572478264570236, 'timestamp': '2025-09-30 22:31:27.503722', 'step': 15601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:27.546178', 'step': 15601, 'epoch': 3} {'type': 'loss', 'content': 0.09319672733545303, 'timestamp': '2025-09-30 22:31:27.557436', 'step': 15602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.599503', 'step': 15602, 'epoch': 3} {'type': 'loss', 'content': 0.07857978343963623, 'timestamp': '2025-09-30 22:31:27.602702', 'step': 15603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.636652', 'step': 15603, 'epoch': 3} {'type': 'loss', 'content': 0.0640919953584671, 'timestamp': '2025-09-30 22:31:27.663448', 'step': 15604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.693378', 'step': 15604, 'epoch': 3} {'type': 'loss', 'content': 0.0843156948685646, 'timestamp': '2025-09-30 22:31:27.706672', 'step': 15605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.740919', 'step': 15605, 'epoch': 3} {'type': 'loss', 'content': 0.0979006290435791, 'timestamp': '2025-09-30 22:31:27.744915', 'step': 15606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.775792', 'step': 15606, 'epoch': 3} {'type': 'loss', 'content': 0.03949529305100441, 'timestamp': '2025-09-30 22:31:27.792029', 'step': 15607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:27.825441', 'step': 15607, 'epoch': 3} {'type': 'loss', 'content': 0.08841271698474884, 'timestamp': '2025-09-30 22:31:27.850734', 'step': 15608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:27.893644', 'step': 15608, 'epoch': 3} {'type': 'loss', 'content': 0.09019316732883453, 'timestamp': '2025-09-30 22:31:27.897962', 'step': 15609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:27.947689', 'step': 15609, 'epoch': 3} {'type': 'loss', 'content': 0.09255713224411011, 'timestamp': '2025-09-30 22:31:27.952479', 'step': 15610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:27.986071', 'step': 15610, 'epoch': 3} {'type': 'loss', 'content': 0.07637487351894379, 'timestamp': '2025-09-30 22:31:27.992141', 'step': 15611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:28.031167', 'step': 15611, 'epoch': 3} {'type': 'loss', 'content': 0.10234026610851288, 'timestamp': '2025-09-30 22:31:28.056761', 'step': 15612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.095562', 'step': 15612, 'epoch': 3} {'type': 'loss', 'content': 0.09093645960092545, 'timestamp': '2025-09-30 22:31:28.099055', 'step': 15613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.130209', 'step': 15613, 'epoch': 3} {'type': 'loss', 'content': 0.19570708274841309, 'timestamp': '2025-09-30 22:31:28.133881', 'step': 15614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.174688', 'step': 15614, 'epoch': 3} {'type': 'loss', 'content': 0.11336369067430496, 'timestamp': '2025-09-30 22:31:28.190616', 'step': 15615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.228702', 'step': 15615, 'epoch': 3} {'type': 'loss', 'content': 0.07830670475959778, 'timestamp': '2025-09-30 22:31:28.254887', 'step': 15616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.288743', 'step': 15616, 'epoch': 3} {'type': 'loss', 'content': 0.11439055949449539, 'timestamp': '2025-09-30 22:31:28.294002', 'step': 15617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:28.325676', 'step': 15617, 'epoch': 3} {'type': 'loss', 'content': 0.06947226077318192, 'timestamp': '2025-09-30 22:31:28.330875', 'step': 15618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.364667', 'step': 15618, 'epoch': 3} {'type': 'loss', 'content': 0.0695779100060463, 'timestamp': '2025-09-30 22:31:28.369433', 'step': 15619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.402069', 'step': 15619, 'epoch': 3} {'type': 'loss', 'content': 0.053273752331733704, 'timestamp': '2025-09-30 22:31:28.427423', 'step': 15620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:28.462299', 'step': 15620, 'epoch': 3} {'type': 'loss', 'content': 0.07825881987810135, 'timestamp': '2025-09-30 22:31:28.469898', 'step': 15621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.502498', 'step': 15621, 'epoch': 3} {'type': 'loss', 'content': 0.02463097870349884, 'timestamp': '2025-09-30 22:31:28.506309', 'step': 15622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.547844', 'step': 15622, 'epoch': 3} {'type': 'loss', 'content': 0.06544023007154465, 'timestamp': '2025-09-30 22:31:28.551157', 'step': 15623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.585074', 'step': 15623, 'epoch': 3} {'type': 'loss', 'content': 0.18453700840473175, 'timestamp': '2025-09-30 22:31:28.624889', 'step': 15624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.660667', 'step': 15624, 'epoch': 3} {'type': 'loss', 'content': 0.0839679092168808, 'timestamp': '2025-09-30 22:31:28.666323', 'step': 15625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.708032', 'step': 15625, 'epoch': 3} {'type': 'loss', 'content': 0.06708173453807831, 'timestamp': '2025-09-30 22:31:28.712314', 'step': 15626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:28.744209', 'step': 15626, 'epoch': 3} {'type': 'loss', 'content': 0.13467802107334137, 'timestamp': '2025-09-30 22:31:28.749283', 'step': 15627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.781412', 'step': 15627, 'epoch': 3} {'type': 'loss', 'content': 0.04628952592611313, 'timestamp': '2025-09-30 22:31:28.805809', 'step': 15628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:28.839964', 'step': 15628, 'epoch': 3} {'type': 'loss', 'content': 0.03164515644311905, 'timestamp': '2025-09-30 22:31:28.844995', 'step': 15629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.880051', 'step': 15629, 'epoch': 3} {'type': 'loss', 'content': 0.08133355528116226, 'timestamp': '2025-09-30 22:31:28.893563', 'step': 15630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:28.934873', 'step': 15630, 'epoch': 3} {'type': 'loss', 'content': 0.0840359628200531, 'timestamp': '2025-09-30 22:31:28.938107', 'step': 15631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:28.971174', 'step': 15631, 'epoch': 3} {'type': 'loss', 'content': 0.07596427202224731, 'timestamp': '2025-09-30 22:31:28.996934', 'step': 15632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:29.028869', 'step': 15632, 'epoch': 3} {'type': 'loss', 'content': 0.050074223428964615, 'timestamp': '2025-09-30 22:31:29.043671', 'step': 15633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.077915', 'step': 15633, 'epoch': 3} {'type': 'loss', 'content': 0.0921328216791153, 'timestamp': '2025-09-30 22:31:29.081729', 'step': 15634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.114170', 'step': 15634, 'epoch': 3} {'type': 'loss', 'content': 0.05819183215498924, 'timestamp': '2025-09-30 22:31:29.118846', 'step': 15635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:29.151860', 'step': 15635, 'epoch': 3} {'type': 'loss', 'content': 0.07294465601444244, 'timestamp': '2025-09-30 22:31:29.187718', 'step': 15636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:29.221539', 'step': 15636, 'epoch': 3} {'type': 'loss', 'content': 0.026039227843284607, 'timestamp': '2025-09-30 22:31:29.235881', 'step': 15637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.267903', 'step': 15637, 'epoch': 3} {'type': 'loss', 'content': 0.09824427217245102, 'timestamp': '2025-09-30 22:31:29.272504', 'step': 15638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.306628', 'step': 15638, 'epoch': 3} {'type': 'loss', 'content': 0.10902590304613113, 'timestamp': '2025-09-30 22:31:29.310858', 'step': 15639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.345115', 'step': 15639, 'epoch': 3} {'type': 'loss', 'content': 0.0294509120285511, 'timestamp': '2025-09-30 22:31:29.371692', 'step': 15640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.405464', 'step': 15640, 'epoch': 3} {'type': 'loss', 'content': 0.05256927013397217, 'timestamp': '2025-09-30 22:31:29.422427', 'step': 15641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.455177', 'step': 15641, 'epoch': 3} {'type': 'loss', 'content': 0.09128301590681076, 'timestamp': '2025-09-30 22:31:29.472381', 'step': 15642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.505449', 'step': 15642, 'epoch': 3} {'type': 'loss', 'content': 0.10210807621479034, 'timestamp': '2025-09-30 22:31:29.521796', 'step': 15643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:29.554334', 'step': 15643, 'epoch': 3} {'type': 'loss', 'content': 0.017455067485570908, 'timestamp': '2025-09-30 22:31:29.582039', 'step': 15644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.615110', 'step': 15644, 'epoch': 3} {'type': 'loss', 'content': 0.08236196637153625, 'timestamp': '2025-09-30 22:31:29.618176', 'step': 15645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.650795', 'step': 15645, 'epoch': 3} {'type': 'loss', 'content': 0.09477010369300842, 'timestamp': '2025-09-30 22:31:29.654894', 'step': 15646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.686877', 'step': 15646, 'epoch': 3} {'type': 'loss', 'content': 0.10004152357578278, 'timestamp': '2025-09-30 22:31:29.690794', 'step': 15647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:29.722038', 'step': 15647, 'epoch': 3} {'type': 'loss', 'content': 0.03872208297252655, 'timestamp': '2025-09-30 22:31:29.748009', 'step': 15648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.780565', 'step': 15648, 'epoch': 3} {'type': 'loss', 'content': 0.07801207154989243, 'timestamp': '2025-09-30 22:31:29.785105', 'step': 15649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:29.818308', 'step': 15649, 'epoch': 3} {'type': 'loss', 'content': 0.1112600788474083, 'timestamp': '2025-09-30 22:31:29.822292', 'step': 15650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.853512', 'step': 15650, 'epoch': 3} {'type': 'loss', 'content': 0.0712604895234108, 'timestamp': '2025-09-30 22:31:29.857041', 'step': 15651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:29.906373', 'step': 15651, 'epoch': 3} {'type': 'loss', 'content': 0.05977415293455124, 'timestamp': '2025-09-30 22:31:29.932639', 'step': 15652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:29.981682', 'step': 15652, 'epoch': 3} {'type': 'loss', 'content': 0.12070655822753906, 'timestamp': '2025-09-30 22:31:29.986955', 'step': 15653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:30.021646', 'step': 15653, 'epoch': 3} {'type': 'loss', 'content': 0.11991684883832932, 'timestamp': '2025-09-30 22:31:30.025643', 'step': 15654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.059746', 'step': 15654, 'epoch': 3} {'type': 'loss', 'content': 0.16348032653331757, 'timestamp': '2025-09-30 22:31:30.071586', 'step': 15655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:30.113266', 'step': 15655, 'epoch': 3} {'type': 'loss', 'content': 0.027035271748900414, 'timestamp': '2025-09-30 22:31:30.140117', 'step': 15656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:30.210280', 'step': 15656, 'epoch': 3} {'type': 'loss', 'content': 0.08329486846923828, 'timestamp': '2025-09-30 22:31:30.214381', 'step': 15657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.254930', 'step': 15657, 'epoch': 3} {'type': 'loss', 'content': 0.09758332371711731, 'timestamp': '2025-09-30 22:31:30.262029', 'step': 15658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:30.296414', 'step': 15658, 'epoch': 3} {'type': 'loss', 'content': 0.12016090005636215, 'timestamp': '2025-09-30 22:31:30.300239', 'step': 15659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.356365', 'step': 15659, 'epoch': 3} {'type': 'loss', 'content': 0.028533320873975754, 'timestamp': '2025-09-30 22:31:30.381635', 'step': 15660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.420529', 'step': 15660, 'epoch': 3} {'type': 'loss', 'content': 0.0991058498620987, 'timestamp': '2025-09-30 22:31:30.423790', 'step': 15661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.461010', 'step': 15661, 'epoch': 3} {'type': 'loss', 'content': 0.037679824978113174, 'timestamp': '2025-09-30 22:31:30.465615', 'step': 15662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.505806', 'step': 15662, 'epoch': 3} {'type': 'loss', 'content': 0.1666920781135559, 'timestamp': '2025-09-30 22:31:30.510413', 'step': 15663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:30.545008', 'step': 15663, 'epoch': 3} {'type': 'loss', 'content': 0.11666901409626007, 'timestamp': '2025-09-30 22:31:30.570731', 'step': 15664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.619030', 'step': 15664, 'epoch': 3} {'type': 'loss', 'content': 0.04813732951879501, 'timestamp': '2025-09-30 22:31:30.623915', 'step': 15665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.662782', 'step': 15665, 'epoch': 3} {'type': 'loss', 'content': 0.06714896857738495, 'timestamp': '2025-09-30 22:31:30.671399', 'step': 15666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:30.705402', 'step': 15666, 'epoch': 3} {'type': 'loss', 'content': 0.14702050387859344, 'timestamp': '2025-09-30 22:31:30.709435', 'step': 15667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.751297', 'step': 15667, 'epoch': 3} {'type': 'loss', 'content': 0.11651276051998138, 'timestamp': '2025-09-30 22:31:30.778559', 'step': 15668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:30.813623', 'step': 15668, 'epoch': 3} {'type': 'loss', 'content': 0.09893550723791122, 'timestamp': '2025-09-30 22:31:30.816923', 'step': 15669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:30.861370', 'step': 15669, 'epoch': 3} {'type': 'loss', 'content': 0.10236723721027374, 'timestamp': '2025-09-30 22:31:30.864633', 'step': 15670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.902116', 'step': 15670, 'epoch': 3} {'type': 'loss', 'content': 0.13339665532112122, 'timestamp': '2025-09-30 22:31:30.905889', 'step': 15671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:30.942258', 'step': 15671, 'epoch': 3} {'type': 'loss', 'content': 0.11843857914209366, 'timestamp': '2025-09-30 22:31:30.967320', 'step': 15672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:31.004646', 'step': 15672, 'epoch': 3} {'type': 'loss', 'content': 0.13474389910697937, 'timestamp': '2025-09-30 22:31:31.009278', 'step': 15673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:31.045571', 'step': 15673, 'epoch': 3} {'type': 'loss', 'content': 0.1362272948026657, 'timestamp': '2025-09-30 22:31:31.048994', 'step': 15674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:31.084397', 'step': 15674, 'epoch': 3} {'type': 'loss', 'content': 0.05907785892486572, 'timestamp': '2025-09-30 22:31:31.088248', 'step': 15675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:31.136668', 'step': 15675, 'epoch': 3} {'type': 'loss', 'content': 0.0647227019071579, 'timestamp': '2025-09-30 22:31:31.162417', 'step': 15676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:31.205290', 'step': 15676, 'epoch': 3} {'type': 'loss', 'content': 0.07418465614318848, 'timestamp': '2025-09-30 22:31:31.215910', 'step': 15677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.261983', 'step': 15677, 'epoch': 3} {'type': 'loss', 'content': 0.18073797225952148, 'timestamp': '2025-09-30 22:31:31.265416', 'step': 15678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:31.309166', 'step': 15678, 'epoch': 3} {'type': 'loss', 'content': 0.02444494143128395, 'timestamp': '2025-09-30 22:31:31.313228', 'step': 15679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.350402', 'step': 15679, 'epoch': 3} {'type': 'loss', 'content': 0.03790841996669769, 'timestamp': '2025-09-30 22:31:31.378133', 'step': 15680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.412869', 'step': 15680, 'epoch': 3} {'type': 'loss', 'content': 0.07594911009073257, 'timestamp': '2025-09-30 22:31:31.424415', 'step': 15681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.460157', 'step': 15681, 'epoch': 3} {'type': 'loss', 'content': 0.066959448158741, 'timestamp': '2025-09-30 22:31:31.465243', 'step': 15682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:31.497320', 'step': 15682, 'epoch': 3} {'type': 'loss', 'content': 0.057437118142843246, 'timestamp': '2025-09-30 22:31:31.501407', 'step': 15683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.547414', 'step': 15683, 'epoch': 3} {'type': 'loss', 'content': 0.03804761916399002, 'timestamp': '2025-09-30 22:31:31.572451', 'step': 15684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:31.604443', 'step': 15684, 'epoch': 3} {'type': 'loss', 'content': 0.13660015165805817, 'timestamp': '2025-09-30 22:31:31.608430', 'step': 15685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.641531', 'step': 15685, 'epoch': 3} {'type': 'loss', 'content': 0.12934091687202454, 'timestamp': '2025-09-30 22:31:31.645491', 'step': 15686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:31.702618', 'step': 15686, 'epoch': 3} {'type': 'loss', 'content': 0.03635645657777786, 'timestamp': '2025-09-30 22:31:31.707455', 'step': 15687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.742768', 'step': 15687, 'epoch': 3} {'type': 'loss', 'content': 0.11569194495677948, 'timestamp': '2025-09-30 22:31:31.767892', 'step': 15688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:31.800164', 'step': 15688, 'epoch': 3} {'type': 'loss', 'content': 0.09159860014915466, 'timestamp': '2025-09-30 22:31:31.804705', 'step': 15689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.838614', 'step': 15689, 'epoch': 3} {'type': 'loss', 'content': 0.06443792581558228, 'timestamp': '2025-09-30 22:31:31.851961', 'step': 15690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:31.887423', 'step': 15690, 'epoch': 3} {'type': 'loss', 'content': 0.08183499425649643, 'timestamp': '2025-09-30 22:31:31.892629', 'step': 15691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:31.924582', 'step': 15691, 'epoch': 3} {'type': 'loss', 'content': 0.01672508753836155, 'timestamp': '2025-09-30 22:31:31.960779', 'step': 15692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:31.995447', 'step': 15692, 'epoch': 3} {'type': 'loss', 'content': 0.08887586742639542, 'timestamp': '2025-09-30 22:31:32.007621', 'step': 15693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:32.048916', 'step': 15693, 'epoch': 3} {'type': 'loss', 'content': 0.08263788372278214, 'timestamp': '2025-09-30 22:31:32.052185', 'step': 15694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:32.083902', 'step': 15694, 'epoch': 3} {'type': 'loss', 'content': 0.06261138617992401, 'timestamp': '2025-09-30 22:31:32.088040', 'step': 15695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.122916', 'step': 15695, 'epoch': 3} {'type': 'loss', 'content': 0.052645906805992126, 'timestamp': '2025-09-30 22:31:32.163421', 'step': 15696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.197412', 'step': 15696, 'epoch': 3} {'type': 'loss', 'content': 0.16899876296520233, 'timestamp': '2025-09-30 22:31:32.202030', 'step': 15697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:32.235423', 'step': 15697, 'epoch': 3} {'type': 'loss', 'content': 0.013700258918106556, 'timestamp': '2025-09-30 22:31:32.240550', 'step': 15698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.275020', 'step': 15698, 'epoch': 3} {'type': 'loss', 'content': 0.10788837820291519, 'timestamp': '2025-09-30 22:31:32.279394', 'step': 15699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:32.312564', 'step': 15699, 'epoch': 3} {'type': 'loss', 'content': 0.12449856102466583, 'timestamp': '2025-09-30 22:31:32.339917', 'step': 15700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:32.381089', 'step': 15700, 'epoch': 3} {'type': 'loss', 'content': 0.05599742755293846, 'timestamp': '2025-09-30 22:31:32.383925', 'step': 15701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:32.430334', 'step': 15701, 'epoch': 3} {'type': 'loss', 'content': 0.07146516442298889, 'timestamp': '2025-09-30 22:31:32.435924', 'step': 15702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:32.466742', 'step': 15702, 'epoch': 3} {'type': 'loss', 'content': 0.07788386195898056, 'timestamp': '2025-09-30 22:31:32.480814', 'step': 15703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:32.512566', 'step': 15703, 'epoch': 3} {'type': 'loss', 'content': 0.07293011993169785, 'timestamp': '2025-09-30 22:31:32.539971', 'step': 15704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.583837', 'step': 15704, 'epoch': 3} {'type': 'loss', 'content': 0.08158203214406967, 'timestamp': '2025-09-30 22:31:32.598902', 'step': 15705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.631543', 'step': 15705, 'epoch': 3} {'type': 'loss', 'content': 0.1472446173429489, 'timestamp': '2025-09-30 22:31:32.647409', 'step': 15706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:32.688165', 'step': 15706, 'epoch': 3} {'type': 'loss', 'content': 0.0827169418334961, 'timestamp': '2025-09-30 22:31:32.694325', 'step': 15707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:32.727873', 'step': 15707, 'epoch': 3} {'type': 'loss', 'content': 0.03733745589852333, 'timestamp': '2025-09-30 22:31:32.762638', 'step': 15708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.796853', 'step': 15708, 'epoch': 3} {'type': 'loss', 'content': 0.03313849866390228, 'timestamp': '2025-09-30 22:31:32.803243', 'step': 15709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:32.836039', 'step': 15709, 'epoch': 3} {'type': 'loss', 'content': 0.1185789480805397, 'timestamp': '2025-09-30 22:31:32.840833', 'step': 15710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:32.876177', 'step': 15710, 'epoch': 3} {'type': 'loss', 'content': 0.09367144852876663, 'timestamp': '2025-09-30 22:31:32.882436', 'step': 15711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.916005', 'step': 15711, 'epoch': 3} {'type': 'loss', 'content': 0.07524880021810532, 'timestamp': '2025-09-30 22:31:32.952421', 'step': 15712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:32.984701', 'step': 15712, 'epoch': 3} {'type': 'loss', 'content': 0.07615800946950912, 'timestamp': '2025-09-30 22:31:32.989859', 'step': 15713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:33.022178', 'step': 15713, 'epoch': 3} {'type': 'loss', 'content': 0.08599543571472168, 'timestamp': '2025-09-30 22:31:33.035082', 'step': 15714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:33.067903', 'step': 15714, 'epoch': 3} {'type': 'loss', 'content': 0.0703255906701088, 'timestamp': '2025-09-30 22:31:33.078196', 'step': 15715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.116142', 'step': 15715, 'epoch': 3} {'type': 'loss', 'content': 0.06931738555431366, 'timestamp': '2025-09-30 22:31:33.142650', 'step': 15716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:33.175527', 'step': 15716, 'epoch': 3} {'type': 'loss', 'content': 0.04150550812482834, 'timestamp': '2025-09-30 22:31:33.179609', 'step': 15717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:33.219690', 'step': 15717, 'epoch': 3} {'type': 'loss', 'content': 0.03789181634783745, 'timestamp': '2025-09-30 22:31:33.235070', 'step': 15718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:33.266645', 'step': 15718, 'epoch': 3} {'type': 'loss', 'content': 0.051606059074401855, 'timestamp': '2025-09-30 22:31:33.271226', 'step': 15719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.303262', 'step': 15719, 'epoch': 3} {'type': 'loss', 'content': 0.05304493382573128, 'timestamp': '2025-09-30 22:31:33.338021', 'step': 15720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.374721', 'step': 15720, 'epoch': 3} {'type': 'loss', 'content': 0.0970098152756691, 'timestamp': '2025-09-30 22:31:33.378190', 'step': 15721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:33.412924', 'step': 15721, 'epoch': 3} {'type': 'loss', 'content': 0.06115048751235008, 'timestamp': '2025-09-30 22:31:33.416310', 'step': 15722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:33.455272', 'step': 15722, 'epoch': 3} {'type': 'loss', 'content': 0.15405598282814026, 'timestamp': '2025-09-30 22:31:33.459950', 'step': 15723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:33.492543', 'step': 15723, 'epoch': 3} {'type': 'loss', 'content': 0.03913426026701927, 'timestamp': '2025-09-30 22:31:33.517613', 'step': 15724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.562916', 'step': 15724, 'epoch': 3} {'type': 'loss', 'content': 0.013018124736845493, 'timestamp': '2025-09-30 22:31:33.567226', 'step': 15725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.602134', 'step': 15725, 'epoch': 3} {'type': 'loss', 'content': 0.0981508195400238, 'timestamp': '2025-09-30 22:31:33.605874', 'step': 15726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:33.637640', 'step': 15726, 'epoch': 3} {'type': 'loss', 'content': 0.07576271146535873, 'timestamp': '2025-09-30 22:31:33.648672', 'step': 15727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:33.694337', 'step': 15727, 'epoch': 3} {'type': 'loss', 'content': 0.09405919909477234, 'timestamp': '2025-09-30 22:31:33.729011', 'step': 15728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.770536', 'step': 15728, 'epoch': 3} {'type': 'loss', 'content': 0.07894832640886307, 'timestamp': '2025-09-30 22:31:33.776900', 'step': 15729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.813162', 'step': 15729, 'epoch': 3} {'type': 'loss', 'content': 0.07384159415960312, 'timestamp': '2025-09-30 22:31:33.821266', 'step': 15730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:33.859984', 'step': 15730, 'epoch': 3} {'type': 'loss', 'content': 0.047223880887031555, 'timestamp': '2025-09-30 22:31:33.869532', 'step': 15731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:33.904310', 'step': 15731, 'epoch': 3} {'type': 'loss', 'content': 0.1383550465106964, 'timestamp': '2025-09-30 22:31:33.934328', 'step': 15732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:33.980157', 'step': 15732, 'epoch': 3} {'type': 'loss', 'content': 0.024223390966653824, 'timestamp': '2025-09-30 22:31:33.983792', 'step': 15733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.015395', 'step': 15733, 'epoch': 3} {'type': 'loss', 'content': 0.06043066084384918, 'timestamp': '2025-09-30 22:31:34.020062', 'step': 15734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:34.053494', 'step': 15734, 'epoch': 3} {'type': 'loss', 'content': 0.07799077033996582, 'timestamp': '2025-09-30 22:31:34.057771', 'step': 15735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.089168', 'step': 15735, 'epoch': 3} {'type': 'loss', 'content': 0.03039710223674774, 'timestamp': '2025-09-30 22:31:34.125488', 'step': 15736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:34.157493', 'step': 15736, 'epoch': 3} {'type': 'loss', 'content': 0.12218282371759415, 'timestamp': '2025-09-30 22:31:34.161226', 'step': 15737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.193976', 'step': 15737, 'epoch': 3} {'type': 'loss', 'content': 0.09265001863241196, 'timestamp': '2025-09-30 22:31:34.198043', 'step': 15738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:34.230631', 'step': 15738, 'epoch': 3} {'type': 'loss', 'content': 0.11431100219488144, 'timestamp': '2025-09-30 22:31:34.237219', 'step': 15739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.269817', 'step': 15739, 'epoch': 3} {'type': 'loss', 'content': 0.06970685720443726, 'timestamp': '2025-09-30 22:31:34.308729', 'step': 15740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:34.344459', 'step': 15740, 'epoch': 3} {'type': 'loss', 'content': 0.07443591952323914, 'timestamp': '2025-09-30 22:31:34.347906', 'step': 15741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.384435', 'step': 15741, 'epoch': 3} {'type': 'loss', 'content': 0.040715862065553665, 'timestamp': '2025-09-30 22:31:34.388055', 'step': 15742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.422318', 'step': 15742, 'epoch': 3} {'type': 'loss', 'content': 0.056498877704143524, 'timestamp': '2025-09-30 22:31:34.426841', 'step': 15743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.469931', 'step': 15743, 'epoch': 3} {'type': 'loss', 'content': 0.05941180884838104, 'timestamp': '2025-09-30 22:31:34.506858', 'step': 15744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:34.539763', 'step': 15744, 'epoch': 3} {'type': 'loss', 'content': 0.08884775638580322, 'timestamp': '2025-09-30 22:31:34.545130', 'step': 15745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.576468', 'step': 15745, 'epoch': 3} {'type': 'loss', 'content': 0.05462993308901787, 'timestamp': '2025-09-30 22:31:34.581399', 'step': 15746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.614854', 'step': 15746, 'epoch': 3} {'type': 'loss', 'content': 0.11559925228357315, 'timestamp': '2025-09-30 22:31:34.619627', 'step': 15747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:34.651758', 'step': 15747, 'epoch': 3} {'type': 'loss', 'content': 0.08784732967615128, 'timestamp': '2025-09-30 22:31:34.688740', 'step': 15748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:34.722786', 'step': 15748, 'epoch': 3} {'type': 'loss', 'content': 0.07231529802083969, 'timestamp': '2025-09-30 22:31:34.725914', 'step': 15749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:34.762156', 'step': 15749, 'epoch': 3} {'type': 'loss', 'content': 0.05013183504343033, 'timestamp': '2025-09-30 22:31:34.764895', 'step': 15750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.797173', 'step': 15750, 'epoch': 3} {'type': 'loss', 'content': 0.0705040842294693, 'timestamp': '2025-09-30 22:31:34.799534', 'step': 15751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.830555', 'step': 15751, 'epoch': 3} {'type': 'loss', 'content': 0.1036827340722084, 'timestamp': '2025-09-30 22:31:34.854559', 'step': 15752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:34.891363', 'step': 15752, 'epoch': 3} {'type': 'loss', 'content': 0.03767882287502289, 'timestamp': '2025-09-30 22:31:34.894401', 'step': 15753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:34.926494', 'step': 15753, 'epoch': 3} {'type': 'loss', 'content': 0.0647088885307312, 'timestamp': '2025-09-30 22:31:34.929760', 'step': 15754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:34.966343', 'step': 15754, 'epoch': 3} {'type': 'loss', 'content': 0.04286160692572594, 'timestamp': '2025-09-30 22:31:34.971187', 'step': 15755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:35.011777', 'step': 15755, 'epoch': 3} {'type': 'loss', 'content': 0.025877518579363823, 'timestamp': '2025-09-30 22:31:35.048195', 'step': 15756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.079013', 'step': 15756, 'epoch': 3} {'type': 'loss', 'content': 0.10357431322336197, 'timestamp': '2025-09-30 22:31:35.086946', 'step': 15757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:35.120342', 'step': 15757, 'epoch': 3} {'type': 'loss', 'content': 0.06797324866056442, 'timestamp': '2025-09-30 22:31:35.124752', 'step': 15758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.157459', 'step': 15758, 'epoch': 3} {'type': 'loss', 'content': 0.16997863352298737, 'timestamp': '2025-09-30 22:31:35.159839', 'step': 15759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.190572', 'step': 15759, 'epoch': 3} {'type': 'loss', 'content': 0.09808328002691269, 'timestamp': '2025-09-30 22:31:35.217364', 'step': 15760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.247767', 'step': 15760, 'epoch': 3} {'type': 'loss', 'content': 0.19167611002922058, 'timestamp': '2025-09-30 22:31:35.252644', 'step': 15761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.285603', 'step': 15761, 'epoch': 3} {'type': 'loss', 'content': 0.017225950956344604, 'timestamp': '2025-09-30 22:31:35.290920', 'step': 15762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.325297', 'step': 15762, 'epoch': 3} {'type': 'loss', 'content': 0.1329643577337265, 'timestamp': '2025-09-30 22:31:35.328507', 'step': 15763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.358485', 'step': 15763, 'epoch': 3} {'type': 'loss', 'content': 0.08726520091295242, 'timestamp': '2025-09-30 22:31:35.387874', 'step': 15764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.421269', 'step': 15764, 'epoch': 3} {'type': 'loss', 'content': 0.08414024114608765, 'timestamp': '2025-09-30 22:31:35.425772', 'step': 15765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.457404', 'step': 15765, 'epoch': 3} {'type': 'loss', 'content': 0.08924483507871628, 'timestamp': '2025-09-30 22:31:35.461413', 'step': 15766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.493589', 'step': 15766, 'epoch': 3} {'type': 'loss', 'content': 0.04274146258831024, 'timestamp': '2025-09-30 22:31:35.499716', 'step': 15767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:35.534895', 'step': 15767, 'epoch': 3} {'type': 'loss', 'content': 0.098122738301754, 'timestamp': '2025-09-30 22:31:35.560030', 'step': 15768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.593852', 'step': 15768, 'epoch': 3} {'type': 'loss', 'content': 0.08293543756008148, 'timestamp': '2025-09-30 22:31:35.596409', 'step': 15769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.631265', 'step': 15769, 'epoch': 3} {'type': 'loss', 'content': 0.08507052809000015, 'timestamp': '2025-09-30 22:31:35.635448', 'step': 15770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:35.667818', 'step': 15770, 'epoch': 3} {'type': 'loss', 'content': 0.08626975864171982, 'timestamp': '2025-09-30 22:31:35.672206', 'step': 15771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.702656', 'step': 15771, 'epoch': 3} {'type': 'loss', 'content': 0.06513439863920212, 'timestamp': '2025-09-30 22:31:35.730160', 'step': 15772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.761149', 'step': 15772, 'epoch': 3} {'type': 'loss', 'content': 0.0036431162152439356, 'timestamp': '2025-09-30 22:31:35.774191', 'step': 15773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:35.816461', 'step': 15773, 'epoch': 3} {'type': 'loss', 'content': 0.1064087375998497, 'timestamp': '2025-09-30 22:31:35.819449', 'step': 15774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:35.851509', 'step': 15774, 'epoch': 3} {'type': 'loss', 'content': 0.05580812692642212, 'timestamp': '2025-09-30 22:31:35.854249', 'step': 15775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.889333', 'step': 15775, 'epoch': 3} {'type': 'loss', 'content': 0.10491503775119781, 'timestamp': '2025-09-30 22:31:35.914163', 'step': 15776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:35.951258', 'step': 15776, 'epoch': 3} {'type': 'loss', 'content': 0.05776949226856232, 'timestamp': '2025-09-30 22:31:35.955598', 'step': 15777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:35.999169', 'step': 15777, 'epoch': 3} {'type': 'loss', 'content': 0.16326726973056793, 'timestamp': '2025-09-30 22:31:36.004353', 'step': 15778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.036571', 'step': 15778, 'epoch': 3} {'type': 'loss', 'content': 0.014360759407281876, 'timestamp': '2025-09-30 22:31:36.041816', 'step': 15779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:36.073945', 'step': 15779, 'epoch': 3} {'type': 'loss', 'content': 0.05617275834083557, 'timestamp': '2025-09-30 22:31:36.098585', 'step': 15780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:36.129510', 'step': 15780, 'epoch': 3} {'type': 'loss', 'content': 0.04608071222901344, 'timestamp': '2025-09-30 22:31:36.134933', 'step': 15781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.169169', 'step': 15781, 'epoch': 3} {'type': 'loss', 'content': 0.06183815002441406, 'timestamp': '2025-09-30 22:31:36.171579', 'step': 15782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:36.205904', 'step': 15782, 'epoch': 3} {'type': 'loss', 'content': 0.18425297737121582, 'timestamp': '2025-09-30 22:31:36.212030', 'step': 15783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:36.246269', 'step': 15783, 'epoch': 3} {'type': 'loss', 'content': 0.0827271118760109, 'timestamp': '2025-09-30 22:31:36.271575', 'step': 15784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.305277', 'step': 15784, 'epoch': 3} {'type': 'loss', 'content': 0.10030607879161835, 'timestamp': '2025-09-30 22:31:36.310667', 'step': 15785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.343254', 'step': 15785, 'epoch': 3} {'type': 'loss', 'content': 0.09723474085330963, 'timestamp': '2025-09-30 22:31:36.348125', 'step': 15786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:36.380554', 'step': 15786, 'epoch': 3} {'type': 'loss', 'content': 0.04670122638344765, 'timestamp': '2025-09-30 22:31:36.383469', 'step': 15787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.418431', 'step': 15787, 'epoch': 3} {'type': 'loss', 'content': 0.1476946771144867, 'timestamp': '2025-09-30 22:31:36.444413', 'step': 15788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:36.486673', 'step': 15788, 'epoch': 3} {'type': 'loss', 'content': 0.07786780595779419, 'timestamp': '2025-09-30 22:31:36.489639', 'step': 15789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.534759', 'step': 15789, 'epoch': 3} {'type': 'loss', 'content': 0.0938401073217392, 'timestamp': '2025-09-30 22:31:36.538315', 'step': 15790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.570829', 'step': 15790, 'epoch': 3} {'type': 'loss', 'content': 0.061000384390354156, 'timestamp': '2025-09-30 22:31:36.574747', 'step': 15791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:36.608172', 'step': 15791, 'epoch': 3} {'type': 'loss', 'content': 0.09445293992757797, 'timestamp': '2025-09-30 22:31:36.635354', 'step': 15792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:36.670538', 'step': 15792, 'epoch': 3} {'type': 'loss', 'content': 0.05598611384630203, 'timestamp': '2025-09-30 22:31:36.678447', 'step': 15793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:36.721378', 'step': 15793, 'epoch': 3} {'type': 'loss', 'content': 0.12256404012441635, 'timestamp': '2025-09-30 22:31:36.725072', 'step': 15794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.768041', 'step': 15794, 'epoch': 3} {'type': 'loss', 'content': 0.06401442736387253, 'timestamp': '2025-09-30 22:31:36.772794', 'step': 15795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:36.807141', 'step': 15795, 'epoch': 3} {'type': 'loss', 'content': 0.0813438817858696, 'timestamp': '2025-09-30 22:31:36.831181', 'step': 15796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:36.874686', 'step': 15796, 'epoch': 3} {'type': 'loss', 'content': 0.0995284914970398, 'timestamp': '2025-09-30 22:31:36.879443', 'step': 15797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:36.917547', 'step': 15797, 'epoch': 3} {'type': 'loss', 'content': 0.0785137340426445, 'timestamp': '2025-09-30 22:31:36.921312', 'step': 15798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:36.958615', 'step': 15798, 'epoch': 3} {'type': 'loss', 'content': 0.07129332423210144, 'timestamp': '2025-09-30 22:31:36.961193', 'step': 15799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:36.997567', 'step': 15799, 'epoch': 3} {'type': 'loss', 'content': 0.04988136515021324, 'timestamp': '2025-09-30 22:31:37.023313', 'step': 15800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.071779', 'step': 15800, 'epoch': 3} {'type': 'loss', 'content': 0.033709120005369186, 'timestamp': '2025-09-30 22:31:37.075869', 'step': 15801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.115328', 'step': 15801, 'epoch': 3} {'type': 'loss', 'content': 0.09088671952486038, 'timestamp': '2025-09-30 22:31:37.127107', 'step': 15802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.169892', 'step': 15802, 'epoch': 3} {'type': 'loss', 'content': 0.06615656614303589, 'timestamp': '2025-09-30 22:31:37.174057', 'step': 15803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.220009', 'step': 15803, 'epoch': 3} {'type': 'loss', 'content': 0.08687705546617508, 'timestamp': '2025-09-30 22:31:37.244426', 'step': 15804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:37.278374', 'step': 15804, 'epoch': 3} {'type': 'loss', 'content': 0.07276392728090286, 'timestamp': '2025-09-30 22:31:37.282027', 'step': 15805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:37.324010', 'step': 15805, 'epoch': 3} {'type': 'loss', 'content': 0.24260132014751434, 'timestamp': '2025-09-30 22:31:37.327789', 'step': 15806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:37.359877', 'step': 15806, 'epoch': 3} {'type': 'loss', 'content': 0.10455366224050522, 'timestamp': '2025-09-30 22:31:37.378610', 'step': 15807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:37.420321', 'step': 15807, 'epoch': 3} {'type': 'loss', 'content': 0.1539301574230194, 'timestamp': '2025-09-30 22:31:37.447770', 'step': 15808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:37.480052', 'step': 15808, 'epoch': 3} {'type': 'loss', 'content': 0.15646782517433167, 'timestamp': '2025-09-30 22:31:37.494304', 'step': 15809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:37.526951', 'step': 15809, 'epoch': 3} {'type': 'loss', 'content': 0.022420180961489677, 'timestamp': '2025-09-30 22:31:37.530754', 'step': 15810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:37.562809', 'step': 15810, 'epoch': 3} {'type': 'loss', 'content': 0.07123083621263504, 'timestamp': '2025-09-30 22:31:37.567945', 'step': 15811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:37.601100', 'step': 15811, 'epoch': 3} {'type': 'loss', 'content': 0.07889021188020706, 'timestamp': '2025-09-30 22:31:37.626893', 'step': 15812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.661329', 'step': 15812, 'epoch': 3} {'type': 'loss', 'content': 0.09611304849386215, 'timestamp': '2025-09-30 22:31:37.669943', 'step': 15813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.704538', 'step': 15813, 'epoch': 3} {'type': 'loss', 'content': 0.10675850510597229, 'timestamp': '2025-09-30 22:31:37.707696', 'step': 15814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:37.738737', 'step': 15814, 'epoch': 3} {'type': 'loss', 'content': 0.051959794014692307, 'timestamp': '2025-09-30 22:31:37.743717', 'step': 15815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:37.776490', 'step': 15815, 'epoch': 3} {'type': 'loss', 'content': 0.06040966510772705, 'timestamp': '2025-09-30 22:31:37.800857', 'step': 15816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:37.831531', 'step': 15816, 'epoch': 3} {'type': 'loss', 'content': 0.0802270695567131, 'timestamp': '2025-09-30 22:31:37.852062', 'step': 15817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.886490', 'step': 15817, 'epoch': 3} {'type': 'loss', 'content': 0.062041040509939194, 'timestamp': '2025-09-30 22:31:37.891806', 'step': 15818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:37.924547', 'step': 15818, 'epoch': 3} {'type': 'loss', 'content': 0.11959187686443329, 'timestamp': '2025-09-30 22:31:37.927131', 'step': 15819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:37.957917', 'step': 15819, 'epoch': 3} {'type': 'loss', 'content': 0.14087915420532227, 'timestamp': '2025-09-30 22:31:37.982327', 'step': 15820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:38.013351', 'step': 15820, 'epoch': 3} {'type': 'loss', 'content': 0.04627995565533638, 'timestamp': '2025-09-30 22:31:38.018578', 'step': 15821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:38.051720', 'step': 15821, 'epoch': 3} {'type': 'loss', 'content': 0.05432199314236641, 'timestamp': '2025-09-30 22:31:38.054366', 'step': 15822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:38.086344', 'step': 15822, 'epoch': 3} {'type': 'loss', 'content': 0.05047442391514778, 'timestamp': '2025-09-30 22:31:38.089371', 'step': 15823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:38.123213', 'step': 15823, 'epoch': 3} {'type': 'loss', 'content': 0.06065352261066437, 'timestamp': '2025-09-30 22:31:38.149121', 'step': 15824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:38.194614', 'step': 15824, 'epoch': 3} {'type': 'loss', 'content': 0.11008085310459137, 'timestamp': '2025-09-30 22:31:38.199067', 'step': 15825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:38.231407', 'step': 15825, 'epoch': 3} {'type': 'loss', 'content': 0.09940274804830551, 'timestamp': '2025-09-30 22:31:38.235855', 'step': 15826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.268644', 'step': 15826, 'epoch': 3} {'type': 'loss', 'content': 0.08390863984823227, 'timestamp': '2025-09-30 22:31:38.274100', 'step': 15827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:38.311463', 'step': 15827, 'epoch': 3} {'type': 'loss', 'content': 0.06169609725475311, 'timestamp': '2025-09-30 22:31:38.336240', 'step': 15828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:38.374486', 'step': 15828, 'epoch': 3} {'type': 'loss', 'content': 0.12102010846138, 'timestamp': '2025-09-30 22:31:38.378771', 'step': 15829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.410272', 'step': 15829, 'epoch': 3} {'type': 'loss', 'content': 0.10982250422239304, 'timestamp': '2025-09-30 22:31:38.416801', 'step': 15830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:38.450962', 'step': 15830, 'epoch': 3} {'type': 'loss', 'content': 0.13008171319961548, 'timestamp': '2025-09-30 22:31:38.454672', 'step': 15831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:38.487121', 'step': 15831, 'epoch': 3} {'type': 'loss', 'content': 0.05811922624707222, 'timestamp': '2025-09-30 22:31:38.525230', 'step': 15832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.557414', 'step': 15832, 'epoch': 3} {'type': 'loss', 'content': 0.11259810626506805, 'timestamp': '2025-09-30 22:31:38.562240', 'step': 15833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:38.594984', 'step': 15833, 'epoch': 3} {'type': 'loss', 'content': 0.03979472443461418, 'timestamp': '2025-09-30 22:31:38.599790', 'step': 15834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:31:38.638809', 'step': 15834, 'epoch': 3} {'type': 'loss', 'content': 0.2138645350933075, 'timestamp': '2025-09-30 22:31:38.646041', 'step': 15835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:38.690572', 'step': 15835, 'epoch': 3} {'type': 'loss', 'content': 0.07768647372722626, 'timestamp': '2025-09-30 22:31:38.715609', 'step': 15836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:38.747885', 'step': 15836, 'epoch': 3} {'type': 'loss', 'content': 0.09291840344667435, 'timestamp': '2025-09-30 22:31:38.767482', 'step': 15837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.800270', 'step': 15837, 'epoch': 3} {'type': 'loss', 'content': 0.07781773060560226, 'timestamp': '2025-09-30 22:31:38.804920', 'step': 15838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.843428', 'step': 15838, 'epoch': 3} {'type': 'loss', 'content': 0.10817719995975494, 'timestamp': '2025-09-30 22:31:38.847957', 'step': 15839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.879429', 'step': 15839, 'epoch': 3} {'type': 'loss', 'content': 0.05742529779672623, 'timestamp': '2025-09-30 22:31:38.903037', 'step': 15840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.939019', 'step': 15840, 'epoch': 3} {'type': 'loss', 'content': 0.12868137657642365, 'timestamp': '2025-09-30 22:31:38.943251', 'step': 15841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:38.978841', 'step': 15841, 'epoch': 3} {'type': 'loss', 'content': 0.06312988698482513, 'timestamp': '2025-09-30 22:31:38.982530', 'step': 15842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:39.021706', 'step': 15842, 'epoch': 3} {'type': 'loss', 'content': 0.09472548961639404, 'timestamp': '2025-09-30 22:31:39.024597', 'step': 15843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:39.055622', 'step': 15843, 'epoch': 3} {'type': 'loss', 'content': 0.14134962856769562, 'timestamp': '2025-09-30 22:31:39.083647', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:31:47.298664', 'step': 15844, 'epoch': 3} {'type': 'pplx', 'content': 12867.622315853305, 'timestamp': '2025-09-30 22:31:47.313955', 'step': 15844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:47.345598', 'step': 15844, 'epoch': 3} {'type': 'loss', 'content': 0.05335848405957222, 'timestamp': '2025-09-30 22:31:47.356863', 'step': 15845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:47.390075', 'step': 15845, 'epoch': 3} {'type': 'loss', 'content': 0.17649348080158234, 'timestamp': '2025-09-30 22:31:47.399481', 'step': 15846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:47.434736', 'step': 15846, 'epoch': 3} {'type': 'loss', 'content': 0.13927499949932098, 'timestamp': '2025-09-30 22:31:47.445279', 'step': 15847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:47.476616', 'step': 15847, 'epoch': 3} {'type': 'loss', 'content': 0.05430921912193298, 'timestamp': '2025-09-30 22:31:47.502104', 'step': 15848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:47.533648', 'step': 15848, 'epoch': 3} {'type': 'loss', 'content': 0.08294877409934998, 'timestamp': '2025-09-30 22:31:47.553120', 'step': 15849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:47.585397', 'step': 15849, 'epoch': 3} {'type': 'loss', 'content': 0.0548962838947773, 'timestamp': '2025-09-30 22:31:47.589897', 'step': 15850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:47.622137', 'step': 15850, 'epoch': 3} {'type': 'loss', 'content': 0.07083439081907272, 'timestamp': '2025-09-30 22:31:47.640826', 'step': 15851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:47.684256', 'step': 15851, 'epoch': 3} {'type': 'loss', 'content': 0.07937690615653992, 'timestamp': '2025-09-30 22:31:47.710745', 'step': 15852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:47.743498', 'step': 15852, 'epoch': 3} {'type': 'loss', 'content': 0.04941172152757645, 'timestamp': '2025-09-30 22:31:47.758823', 'step': 15853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:47.793084', 'step': 15853, 'epoch': 3} {'type': 'loss', 'content': 0.024644142016768456, 'timestamp': '2025-09-30 22:31:47.801996', 'step': 15854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:47.841947', 'step': 15854, 'epoch': 3} {'type': 'loss', 'content': 0.0834452286362648, 'timestamp': '2025-09-30 22:31:47.855998', 'step': 15855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:47.888302', 'step': 15855, 'epoch': 3} {'type': 'loss', 'content': 0.0524420365691185, 'timestamp': '2025-09-30 22:31:47.913351', 'step': 15856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:47.947187', 'step': 15856, 'epoch': 3} {'type': 'loss', 'content': 0.06169744208455086, 'timestamp': '2025-09-30 22:31:47.951056', 'step': 15857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:47.984518', 'step': 15857, 'epoch': 3} {'type': 'loss', 'content': 0.06217888370156288, 'timestamp': '2025-09-30 22:31:47.990851', 'step': 15858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:48.037790', 'step': 15858, 'epoch': 3} {'type': 'loss', 'content': 0.06551198661327362, 'timestamp': '2025-09-30 22:31:48.041985', 'step': 15859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.081368', 'step': 15859, 'epoch': 3} {'type': 'loss', 'content': 0.07941669970750809, 'timestamp': '2025-09-30 22:31:48.106880', 'step': 15860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.145131', 'step': 15860, 'epoch': 3} {'type': 'loss', 'content': 0.06608260422945023, 'timestamp': '2025-09-30 22:31:48.148833', 'step': 15861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.180669', 'step': 15861, 'epoch': 3} {'type': 'loss', 'content': 0.03304309770464897, 'timestamp': '2025-09-30 22:31:48.186022', 'step': 15862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:48.229837', 'step': 15862, 'epoch': 3} {'type': 'loss', 'content': 0.10835587233304977, 'timestamp': '2025-09-30 22:31:48.232802', 'step': 15863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.265277', 'step': 15863, 'epoch': 3} {'type': 'loss', 'content': 0.02036357671022415, 'timestamp': '2025-09-30 22:31:48.292189', 'step': 15864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:48.325484', 'step': 15864, 'epoch': 3} {'type': 'loss', 'content': 0.05670022964477539, 'timestamp': '2025-09-30 22:31:48.339931', 'step': 15865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:48.373635', 'step': 15865, 'epoch': 3} {'type': 'loss', 'content': 0.047004129737615585, 'timestamp': '2025-09-30 22:31:48.378167', 'step': 15866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.410168', 'step': 15866, 'epoch': 3} {'type': 'loss', 'content': 0.08319155126810074, 'timestamp': '2025-09-30 22:31:48.415810', 'step': 15867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:48.462327', 'step': 15867, 'epoch': 3} {'type': 'loss', 'content': 0.09223224222660065, 'timestamp': '2025-09-30 22:31:48.488630', 'step': 15868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:48.543174', 'step': 15868, 'epoch': 3} {'type': 'loss', 'content': 0.10795392841100693, 'timestamp': '2025-09-30 22:31:48.559567', 'step': 15869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.592601', 'step': 15869, 'epoch': 3} {'type': 'loss', 'content': 0.11530780792236328, 'timestamp': '2025-09-30 22:31:48.607289', 'step': 15870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:48.648443', 'step': 15870, 'epoch': 3} {'type': 'loss', 'content': 0.0608094222843647, 'timestamp': '2025-09-30 22:31:48.651142', 'step': 15871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:48.691328', 'step': 15871, 'epoch': 3} {'type': 'loss', 'content': 0.09085196256637573, 'timestamp': '2025-09-30 22:31:48.717150', 'step': 15872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.750713', 'step': 15872, 'epoch': 3} {'type': 'loss', 'content': 0.11317124962806702, 'timestamp': '2025-09-30 22:31:48.754784', 'step': 15873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:48.802228', 'step': 15873, 'epoch': 3} {'type': 'loss', 'content': 0.025331787765026093, 'timestamp': '2025-09-30 22:31:48.808848', 'step': 15874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.839742', 'step': 15874, 'epoch': 3} {'type': 'loss', 'content': 0.10284562408924103, 'timestamp': '2025-09-30 22:31:48.855618', 'step': 15875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.888646', 'step': 15875, 'epoch': 3} {'type': 'loss', 'content': 0.06787704676389694, 'timestamp': '2025-09-30 22:31:48.914076', 'step': 15876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:48.948562', 'step': 15876, 'epoch': 3} {'type': 'loss', 'content': 0.08590235561132431, 'timestamp': '2025-09-30 22:31:48.954146', 'step': 15877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:48.988202', 'step': 15877, 'epoch': 3} {'type': 'loss', 'content': 0.07334697246551514, 'timestamp': '2025-09-30 22:31:49.006890', 'step': 15878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.056029', 'step': 15878, 'epoch': 3} {'type': 'loss', 'content': 0.1517079770565033, 'timestamp': '2025-09-30 22:31:49.060872', 'step': 15879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.094011', 'step': 15879, 'epoch': 3} {'type': 'loss', 'content': 0.059987373650074005, 'timestamp': '2025-09-30 22:31:49.121523', 'step': 15880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.153998', 'step': 15880, 'epoch': 3} {'type': 'loss', 'content': 0.10130973905324936, 'timestamp': '2025-09-30 22:31:49.157124', 'step': 15881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:49.190270', 'step': 15881, 'epoch': 3} {'type': 'loss', 'content': 0.05588621273636818, 'timestamp': '2025-09-30 22:31:49.194478', 'step': 15882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.235823', 'step': 15882, 'epoch': 3} {'type': 'loss', 'content': 0.10757031291723251, 'timestamp': '2025-09-30 22:31:49.240292', 'step': 15883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.274190', 'step': 15883, 'epoch': 3} {'type': 'loss', 'content': 0.0718463733792305, 'timestamp': '2025-09-30 22:31:49.299931', 'step': 15884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:49.333701', 'step': 15884, 'epoch': 3} {'type': 'loss', 'content': 0.076964370906353, 'timestamp': '2025-09-30 22:31:49.337829', 'step': 15885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.370848', 'step': 15885, 'epoch': 3} {'type': 'loss', 'content': 0.04534241929650307, 'timestamp': '2025-09-30 22:31:49.375490', 'step': 15886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:49.416297', 'step': 15886, 'epoch': 3} {'type': 'loss', 'content': 0.06505726277828217, 'timestamp': '2025-09-30 22:31:49.422042', 'step': 15887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:49.456527', 'step': 15887, 'epoch': 3} {'type': 'loss', 'content': 0.09768973290920258, 'timestamp': '2025-09-30 22:31:49.481417', 'step': 15888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.512395', 'step': 15888, 'epoch': 3} {'type': 'loss', 'content': 0.06931575387716293, 'timestamp': '2025-09-30 22:31:49.515607', 'step': 15889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:49.552173', 'step': 15889, 'epoch': 3} {'type': 'loss', 'content': 0.10366791486740112, 'timestamp': '2025-09-30 22:31:49.555444', 'step': 15890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.586965', 'step': 15890, 'epoch': 3} {'type': 'loss', 'content': 0.08185377717018127, 'timestamp': '2025-09-30 22:31:49.596125', 'step': 15891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:49.634658', 'step': 15891, 'epoch': 3} {'type': 'loss', 'content': 0.16399995982646942, 'timestamp': '2025-09-30 22:31:49.659264', 'step': 15892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:49.705912', 'step': 15892, 'epoch': 3} {'type': 'loss', 'content': 0.08543115854263306, 'timestamp': '2025-09-30 22:31:49.710864', 'step': 15893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:49.744228', 'step': 15893, 'epoch': 3} {'type': 'loss', 'content': 0.0414573960006237, 'timestamp': '2025-09-30 22:31:49.747949', 'step': 15894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:49.780228', 'step': 15894, 'epoch': 3} {'type': 'loss', 'content': 0.09751322120428085, 'timestamp': '2025-09-30 22:31:49.782763', 'step': 15895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:49.815860', 'step': 15895, 'epoch': 3} {'type': 'loss', 'content': 0.0806407779455185, 'timestamp': '2025-09-30 22:31:49.841499', 'step': 15896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:49.872166', 'step': 15896, 'epoch': 3} {'type': 'loss', 'content': 0.049261100590229034, 'timestamp': '2025-09-30 22:31:49.877844', 'step': 15897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:49.910779', 'step': 15897, 'epoch': 3} {'type': 'loss', 'content': 0.11304860562086105, 'timestamp': '2025-09-30 22:31:49.914592', 'step': 15898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:49.947166', 'step': 15898, 'epoch': 3} {'type': 'loss', 'content': 0.06959357857704163, 'timestamp': '2025-09-30 22:31:49.952029', 'step': 15899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:49.997329', 'step': 15899, 'epoch': 3} {'type': 'loss', 'content': 0.05529501289129257, 'timestamp': '2025-09-30 22:31:50.022002', 'step': 15900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:50.053562', 'step': 15900, 'epoch': 3} {'type': 'loss', 'content': 0.11742991209030151, 'timestamp': '2025-09-30 22:31:50.059938', 'step': 15901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.099332', 'step': 15901, 'epoch': 3} {'type': 'loss', 'content': 0.0782306045293808, 'timestamp': '2025-09-30 22:31:50.102278', 'step': 15902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:50.132629', 'step': 15902, 'epoch': 3} {'type': 'loss', 'content': 0.023933954536914825, 'timestamp': '2025-09-30 22:31:50.138594', 'step': 15903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:50.179790', 'step': 15903, 'epoch': 3} {'type': 'loss', 'content': 0.07282676547765732, 'timestamp': '2025-09-30 22:31:50.206547', 'step': 15904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:50.240528', 'step': 15904, 'epoch': 3} {'type': 'loss', 'content': 0.10896994173526764, 'timestamp': '2025-09-30 22:31:50.244858', 'step': 15905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.278127', 'step': 15905, 'epoch': 3} {'type': 'loss', 'content': 0.05043676495552063, 'timestamp': '2025-09-30 22:31:50.283515', 'step': 15906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:50.317527', 'step': 15906, 'epoch': 3} {'type': 'loss', 'content': 0.05588080361485481, 'timestamp': '2025-09-30 22:31:50.338779', 'step': 15907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:50.371870', 'step': 15907, 'epoch': 3} {'type': 'loss', 'content': 0.07767389714717865, 'timestamp': '2025-09-30 22:31:50.398408', 'step': 15908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.441646', 'step': 15908, 'epoch': 3} {'type': 'loss', 'content': 0.13231828808784485, 'timestamp': '2025-09-30 22:31:50.444457', 'step': 15909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.475601', 'step': 15909, 'epoch': 3} {'type': 'loss', 'content': 0.09153696149587631, 'timestamp': '2025-09-30 22:31:50.479118', 'step': 15910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.509603', 'step': 15910, 'epoch': 3} {'type': 'loss', 'content': 0.032111842185258865, 'timestamp': '2025-09-30 22:31:50.522358', 'step': 15911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.564855', 'step': 15911, 'epoch': 3} {'type': 'loss', 'content': 0.0980076938867569, 'timestamp': '2025-09-30 22:31:50.589589', 'step': 15912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:50.621383', 'step': 15912, 'epoch': 3} {'type': 'loss', 'content': 0.04966628551483154, 'timestamp': '2025-09-30 22:31:50.624289', 'step': 15913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:50.655388', 'step': 15913, 'epoch': 3} {'type': 'loss', 'content': 0.08628631383180618, 'timestamp': '2025-09-30 22:31:50.665836', 'step': 15914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.696283', 'step': 15914, 'epoch': 3} {'type': 'loss', 'content': 0.0400846004486084, 'timestamp': '2025-09-30 22:31:50.711055', 'step': 15915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:50.750990', 'step': 15915, 'epoch': 3} {'type': 'loss', 'content': 0.1050378754734993, 'timestamp': '2025-09-30 22:31:50.777326', 'step': 15916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:50.810275', 'step': 15916, 'epoch': 3} {'type': 'loss', 'content': 0.057723335921764374, 'timestamp': '2025-09-30 22:31:50.826256', 'step': 15917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:50.872047', 'step': 15917, 'epoch': 3} {'type': 'loss', 'content': 0.14840105175971985, 'timestamp': '2025-09-30 22:31:50.876079', 'step': 15918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:50.923602', 'step': 15918, 'epoch': 3} {'type': 'loss', 'content': 0.11120060831308365, 'timestamp': '2025-09-30 22:31:50.927053', 'step': 15919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:50.959717', 'step': 15919, 'epoch': 3} {'type': 'loss', 'content': 0.04530034959316254, 'timestamp': '2025-09-30 22:31:50.997775', 'step': 15920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.029001', 'step': 15920, 'epoch': 3} {'type': 'loss', 'content': 0.06150554120540619, 'timestamp': '2025-09-30 22:31:51.043977', 'step': 15921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.086795', 'step': 15921, 'epoch': 3} {'type': 'loss', 'content': 0.06664112955331802, 'timestamp': '2025-09-30 22:31:51.093352', 'step': 15922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.134345', 'step': 15922, 'epoch': 3} {'type': 'loss', 'content': 0.14238187670707703, 'timestamp': '2025-09-30 22:31:51.144108', 'step': 15923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.174463', 'step': 15923, 'epoch': 3} {'type': 'loss', 'content': 0.05062251165509224, 'timestamp': '2025-09-30 22:31:51.206506', 'step': 15924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.237046', 'step': 15924, 'epoch': 3} {'type': 'loss', 'content': 0.05925992876291275, 'timestamp': '2025-09-30 22:31:51.242182', 'step': 15925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.281708', 'step': 15925, 'epoch': 3} {'type': 'loss', 'content': 0.06228560581803322, 'timestamp': '2025-09-30 22:31:51.284161', 'step': 15926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.315671', 'step': 15926, 'epoch': 3} {'type': 'loss', 'content': 0.08218729496002197, 'timestamp': '2025-09-30 22:31:51.319484', 'step': 15927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.350832', 'step': 15927, 'epoch': 3} {'type': 'loss', 'content': 0.05769772455096245, 'timestamp': '2025-09-30 22:31:51.375320', 'step': 15928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:51.416229', 'step': 15928, 'epoch': 3} {'type': 'loss', 'content': 0.07943767309188843, 'timestamp': '2025-09-30 22:31:51.419736', 'step': 15929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.452333', 'step': 15929, 'epoch': 3} {'type': 'loss', 'content': 0.05615504831075668, 'timestamp': '2025-09-30 22:31:51.456669', 'step': 15930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:51.492868', 'step': 15930, 'epoch': 3} {'type': 'loss', 'content': 0.17922991514205933, 'timestamp': '2025-09-30 22:31:51.498487', 'step': 15931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:51.529577', 'step': 15931, 'epoch': 3} {'type': 'loss', 'content': 0.05393745377659798, 'timestamp': '2025-09-30 22:31:51.565382', 'step': 15932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:31:51.595996', 'step': 15932, 'epoch': 3} {'type': 'loss', 'content': 0.10619740188121796, 'timestamp': '2025-09-30 22:31:51.616388', 'step': 15933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.660093', 'step': 15933, 'epoch': 3} {'type': 'loss', 'content': 0.05613633245229721, 'timestamp': '2025-09-30 22:31:51.663623', 'step': 15934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.695480', 'step': 15934, 'epoch': 3} {'type': 'loss', 'content': 0.07283156365156174, 'timestamp': '2025-09-30 22:31:51.705450', 'step': 15935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.736346', 'step': 15935, 'epoch': 3} {'type': 'loss', 'content': 0.08290421962738037, 'timestamp': '2025-09-30 22:31:51.760780', 'step': 15936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.792976', 'step': 15936, 'epoch': 3} {'type': 'loss', 'content': 0.02949795313179493, 'timestamp': '2025-09-30 22:31:51.802974', 'step': 15937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:51.840816', 'step': 15937, 'epoch': 3} {'type': 'loss', 'content': 0.06259673833847046, 'timestamp': '2025-09-30 22:31:51.844486', 'step': 15938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:51.875102', 'step': 15938, 'epoch': 3} {'type': 'loss', 'content': 0.05645105242729187, 'timestamp': '2025-09-30 22:31:51.879761', 'step': 15939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.919564', 'step': 15939, 'epoch': 3} {'type': 'loss', 'content': 0.11522817611694336, 'timestamp': '2025-09-30 22:31:51.945716', 'step': 15940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:51.977323', 'step': 15940, 'epoch': 3} {'type': 'loss', 'content': 0.0744299590587616, 'timestamp': '2025-09-30 22:31:51.980432', 'step': 15941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.017411', 'step': 15941, 'epoch': 3} {'type': 'loss', 'content': 0.1403479427099228, 'timestamp': '2025-09-30 22:31:52.032924', 'step': 15942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:52.064385', 'step': 15942, 'epoch': 3} {'type': 'loss', 'content': 0.09619463235139847, 'timestamp': '2025-09-30 22:31:52.082619', 'step': 15943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.127390', 'step': 15943, 'epoch': 3} {'type': 'loss', 'content': 0.045729491859674454, 'timestamp': '2025-09-30 22:31:52.153316', 'step': 15944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:52.185843', 'step': 15944, 'epoch': 3} {'type': 'loss', 'content': 0.08129310607910156, 'timestamp': '2025-09-30 22:31:52.198219', 'step': 15945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:52.235331', 'step': 15945, 'epoch': 3} {'type': 'loss', 'content': 0.0707443431019783, 'timestamp': '2025-09-30 22:31:52.239410', 'step': 15946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.270511', 'step': 15946, 'epoch': 3} {'type': 'loss', 'content': 0.059434711933135986, 'timestamp': '2025-09-30 22:31:52.274214', 'step': 15947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.312626', 'step': 15947, 'epoch': 3} {'type': 'loss', 'content': 0.1009969711303711, 'timestamp': '2025-09-30 22:31:52.337915', 'step': 15948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.371562', 'step': 15948, 'epoch': 3} {'type': 'loss', 'content': 0.12058083713054657, 'timestamp': '2025-09-30 22:31:52.381847', 'step': 15949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:31:52.415602', 'step': 15949, 'epoch': 3} {'type': 'loss', 'content': 0.043522268533706665, 'timestamp': '2025-09-30 22:31:52.426480', 'step': 15950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:52.458647', 'step': 15950, 'epoch': 3} {'type': 'loss', 'content': 0.1288217157125473, 'timestamp': '2025-09-30 22:31:52.462976', 'step': 15951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.495581', 'step': 15951, 'epoch': 3} {'type': 'loss', 'content': 0.04910891503095627, 'timestamp': '2025-09-30 22:31:52.523087', 'step': 15952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:52.556080', 'step': 15952, 'epoch': 3} {'type': 'loss', 'content': 0.17274898290634155, 'timestamp': '2025-09-30 22:31:52.565896', 'step': 15953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.604338', 'step': 15953, 'epoch': 3} {'type': 'loss', 'content': 0.0709092989563942, 'timestamp': '2025-09-30 22:31:52.608480', 'step': 15954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:52.641445', 'step': 15954, 'epoch': 3} {'type': 'loss', 'content': 0.11025609076023102, 'timestamp': '2025-09-30 22:31:52.660527', 'step': 15955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:52.715289', 'step': 15955, 'epoch': 3} {'type': 'loss', 'content': 0.05796913802623749, 'timestamp': '2025-09-30 22:31:52.747911', 'step': 15956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:52.780174', 'step': 15956, 'epoch': 3} {'type': 'loss', 'content': 0.07635462284088135, 'timestamp': '2025-09-30 22:31:52.784924', 'step': 15957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:52.816954', 'step': 15957, 'epoch': 3} {'type': 'loss', 'content': 0.06248319894075394, 'timestamp': '2025-09-30 22:31:52.832370', 'step': 15958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.871899', 'step': 15958, 'epoch': 3} {'type': 'loss', 'content': 0.05935103818774223, 'timestamp': '2025-09-30 22:31:52.875918', 'step': 15959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:52.911484', 'step': 15959, 'epoch': 3} {'type': 'loss', 'content': 0.049277275800704956, 'timestamp': '2025-09-30 22:31:52.936652', 'step': 15960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:52.967645', 'step': 15960, 'epoch': 3} {'type': 'loss', 'content': 0.13893651962280273, 'timestamp': '2025-09-30 22:31:52.971663', 'step': 15961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.003830', 'step': 15961, 'epoch': 3} {'type': 'loss', 'content': 0.0982123613357544, 'timestamp': '2025-09-30 22:31:53.007253', 'step': 15962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:53.039922', 'step': 15962, 'epoch': 3} {'type': 'loss', 'content': 0.05239177122712135, 'timestamp': '2025-09-30 22:31:53.043217', 'step': 15963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.074685', 'step': 15963, 'epoch': 3} {'type': 'loss', 'content': 0.12245123833417892, 'timestamp': '2025-09-30 22:31:53.102432', 'step': 15964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.139212', 'step': 15964, 'epoch': 3} {'type': 'loss', 'content': 0.12176428735256195, 'timestamp': '2025-09-30 22:31:53.143824', 'step': 15965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.182309', 'step': 15965, 'epoch': 3} {'type': 'loss', 'content': 0.02787795104086399, 'timestamp': '2025-09-30 22:31:53.194467', 'step': 15966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.232071', 'step': 15966, 'epoch': 3} {'type': 'loss', 'content': 0.13324569165706635, 'timestamp': '2025-09-30 22:31:53.244342', 'step': 15967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.276642', 'step': 15967, 'epoch': 3} {'type': 'loss', 'content': 0.058687400072813034, 'timestamp': '2025-09-30 22:31:53.312303', 'step': 15968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.342447', 'step': 15968, 'epoch': 3} {'type': 'loss', 'content': 0.0750700831413269, 'timestamp': '2025-09-30 22:31:53.346058', 'step': 15969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.378961', 'step': 15969, 'epoch': 3} {'type': 'loss', 'content': 0.05816615745425224, 'timestamp': '2025-09-30 22:31:53.382827', 'step': 15970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.414409', 'step': 15970, 'epoch': 3} {'type': 'loss', 'content': 0.07723250985145569, 'timestamp': '2025-09-30 22:31:53.418454', 'step': 15971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.454280', 'step': 15971, 'epoch': 3} {'type': 'loss', 'content': 0.05532693862915039, 'timestamp': '2025-09-30 22:31:53.481230', 'step': 15972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.512816', 'step': 15972, 'epoch': 3} {'type': 'loss', 'content': 0.09434068948030472, 'timestamp': '2025-09-30 22:31:53.523359', 'step': 15973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.556169', 'step': 15973, 'epoch': 3} {'type': 'loss', 'content': 0.06126461550593376, 'timestamp': '2025-09-30 22:31:53.561241', 'step': 15974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.593979', 'step': 15974, 'epoch': 3} {'type': 'loss', 'content': 0.06194007396697998, 'timestamp': '2025-09-30 22:31:53.605799', 'step': 15975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:31:53.645053', 'step': 15975, 'epoch': 3} {'type': 'loss', 'content': 0.1280144900083542, 'timestamp': '2025-09-30 22:31:53.671724', 'step': 15976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.710267', 'step': 15976, 'epoch': 3} {'type': 'loss', 'content': 0.17435675859451294, 'timestamp': '2025-09-30 22:31:53.713081', 'step': 15977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.752189', 'step': 15977, 'epoch': 3} {'type': 'loss', 'content': 0.036189984530210495, 'timestamp': '2025-09-30 22:31:53.755524', 'step': 15978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.787461', 'step': 15978, 'epoch': 3} {'type': 'loss', 'content': 0.07837516814470291, 'timestamp': '2025-09-30 22:31:53.792650', 'step': 15979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:53.825276', 'step': 15979, 'epoch': 3} {'type': 'loss', 'content': 0.08517526835203171, 'timestamp': '2025-09-30 22:31:53.856396', 'step': 15980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.888327', 'step': 15980, 'epoch': 3} {'type': 'loss', 'content': 0.20390628278255463, 'timestamp': '2025-09-30 22:31:53.898044', 'step': 15981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:53.936912', 'step': 15981, 'epoch': 3} {'type': 'loss', 'content': 0.06604544073343277, 'timestamp': '2025-09-30 22:31:53.941918', 'step': 15982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:53.973300', 'step': 15982, 'epoch': 3} {'type': 'loss', 'content': 0.042776450514793396, 'timestamp': '2025-09-30 22:31:53.976570', 'step': 15983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:54.009397', 'step': 15983, 'epoch': 3} {'type': 'loss', 'content': 0.06910405308008194, 'timestamp': '2025-09-30 22:31:54.034240', 'step': 15984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.078509', 'step': 15984, 'epoch': 3} {'type': 'loss', 'content': 0.041464563459157944, 'timestamp': '2025-09-30 22:31:54.088846', 'step': 15985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.128630', 'step': 15985, 'epoch': 3} {'type': 'loss', 'content': 0.08655131608247757, 'timestamp': '2025-09-30 22:31:54.137069', 'step': 15986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:31:54.178893', 'step': 15986, 'epoch': 3} {'type': 'loss', 'content': 0.07432664185762405, 'timestamp': '2025-09-30 22:31:54.182401', 'step': 15987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:54.221746', 'step': 15987, 'epoch': 3} {'type': 'loss', 'content': 0.07834417372941971, 'timestamp': '2025-09-30 22:31:54.250825', 'step': 15988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:54.286422', 'step': 15988, 'epoch': 3} {'type': 'loss', 'content': 0.11122199147939682, 'timestamp': '2025-09-30 22:31:54.289939', 'step': 15989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:54.320409', 'step': 15989, 'epoch': 3} {'type': 'loss', 'content': 0.09441796690225601, 'timestamp': '2025-09-30 22:31:54.324666', 'step': 15990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.370506', 'step': 15990, 'epoch': 3} {'type': 'loss', 'content': 0.08575655519962311, 'timestamp': '2025-09-30 22:31:54.374349', 'step': 15991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:31:54.420344', 'step': 15991, 'epoch': 3} {'type': 'loss', 'content': 0.04280418902635574, 'timestamp': '2025-09-30 22:31:54.447135', 'step': 15992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.488596', 'step': 15992, 'epoch': 3} {'type': 'loss', 'content': 0.13647550344467163, 'timestamp': '2025-09-30 22:31:54.494510', 'step': 15993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.527743', 'step': 15993, 'epoch': 3} {'type': 'loss', 'content': 0.07914629578590393, 'timestamp': '2025-09-30 22:31:54.532507', 'step': 15994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:54.564520', 'step': 15994, 'epoch': 3} {'type': 'loss', 'content': 0.08736004680395126, 'timestamp': '2025-09-30 22:31:54.569235', 'step': 15995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.603859', 'step': 15995, 'epoch': 3} {'type': 'loss', 'content': 0.06829820573329926, 'timestamp': '2025-09-30 22:31:54.635198', 'step': 15996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.666337', 'step': 15996, 'epoch': 3} {'type': 'loss', 'content': 0.10814859718084335, 'timestamp': '2025-09-30 22:31:54.669135', 'step': 15997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:31:54.702740', 'step': 15997, 'epoch': 3} {'type': 'loss', 'content': 0.03532476723194122, 'timestamp': '2025-09-30 22:31:54.706045', 'step': 15998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:54.738605', 'step': 15998, 'epoch': 3} {'type': 'loss', 'content': 0.029770854860544205, 'timestamp': '2025-09-30 22:31:54.746252', 'step': 15999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:31:54.777864', 'step': 15999, 'epoch': 3} {'type': 'loss', 'content': 0.15675410628318787, 'timestamp': '2025-09-30 22:31:54.802667', 'step': 16000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16000', 'timestamp': '2025-09-30 22:31:59.993292', 'step': 16000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:00.049759', 'step': 16000, 'epoch': 3} {'type': 'loss', 'content': 0.0435875728726387, 'timestamp': '2025-09-30 22:32:00.052057', 'step': 16001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.086883', 'step': 16001, 'epoch': 3} {'type': 'loss', 'content': 0.019346361979842186, 'timestamp': '2025-09-30 22:32:00.089853', 'step': 16002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:00.125645', 'step': 16002, 'epoch': 3} {'type': 'loss', 'content': 0.12477043271064758, 'timestamp': '2025-09-30 22:32:00.127965', 'step': 16003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.159103', 'step': 16003, 'epoch': 3} {'type': 'loss', 'content': 0.048009272664785385, 'timestamp': '2025-09-30 22:32:00.184369', 'step': 16004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:00.218332', 'step': 16004, 'epoch': 3} {'type': 'loss', 'content': 0.07533402740955353, 'timestamp': '2025-09-30 22:32:00.224500', 'step': 16005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:00.256080', 'step': 16005, 'epoch': 3} {'type': 'loss', 'content': 0.06363978236913681, 'timestamp': '2025-09-30 22:32:00.258646', 'step': 16006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.299371', 'step': 16006, 'epoch': 3} {'type': 'loss', 'content': 0.09537344425916672, 'timestamp': '2025-09-30 22:32:00.302460', 'step': 16007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:00.335434', 'step': 16007, 'epoch': 3} {'type': 'loss', 'content': 0.06048804149031639, 'timestamp': '2025-09-30 22:32:00.359591', 'step': 16008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:00.392155', 'step': 16008, 'epoch': 3} {'type': 'loss', 'content': 0.04336976259946823, 'timestamp': '2025-09-30 22:32:00.402353', 'step': 16009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:00.438341', 'step': 16009, 'epoch': 3} {'type': 'loss', 'content': 0.08798613399267197, 'timestamp': '2025-09-30 22:32:00.441467', 'step': 16010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:00.479653', 'step': 16010, 'epoch': 3} {'type': 'loss', 'content': 0.06581363081932068, 'timestamp': '2025-09-30 22:32:00.493421', 'step': 16011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:00.526672', 'step': 16011, 'epoch': 3} {'type': 'loss', 'content': 0.07232391089200974, 'timestamp': '2025-09-30 22:32:00.551871', 'step': 16012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:00.594619', 'step': 16012, 'epoch': 3} {'type': 'loss', 'content': 0.08225034177303314, 'timestamp': '2025-09-30 22:32:00.598680', 'step': 16013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:00.639834', 'step': 16013, 'epoch': 3} {'type': 'loss', 'content': 0.051800135523080826, 'timestamp': '2025-09-30 22:32:00.649289', 'step': 16014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:00.680506', 'step': 16014, 'epoch': 3} {'type': 'loss', 'content': 0.08270185440778732, 'timestamp': '2025-09-30 22:32:00.685048', 'step': 16015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.716637', 'step': 16015, 'epoch': 3} {'type': 'loss', 'content': 0.07039785385131836, 'timestamp': '2025-09-30 22:32:00.740783', 'step': 16016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:00.771248', 'step': 16016, 'epoch': 3} {'type': 'loss', 'content': 0.09320958703756332, 'timestamp': '2025-09-30 22:32:00.775289', 'step': 16017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:00.821162', 'step': 16017, 'epoch': 3} {'type': 'loss', 'content': 0.08944050967693329, 'timestamp': '2025-09-30 22:32:00.832767', 'step': 16018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.865649', 'step': 16018, 'epoch': 3} {'type': 'loss', 'content': 0.0707714855670929, 'timestamp': '2025-09-30 22:32:00.872365', 'step': 16019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:00.902951', 'step': 16019, 'epoch': 3} {'type': 'loss', 'content': 0.08203098922967911, 'timestamp': '2025-09-30 22:32:00.930853', 'step': 16020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:00.961875', 'step': 16020, 'epoch': 3} {'type': 'loss', 'content': 0.033369049429893494, 'timestamp': '2025-09-30 22:32:00.966692', 'step': 16021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:00.998637', 'step': 16021, 'epoch': 3} {'type': 'loss', 'content': 0.04939448460936546, 'timestamp': '2025-09-30 22:32:01.003536', 'step': 16022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:01.037156', 'step': 16022, 'epoch': 3} {'type': 'loss', 'content': 0.10338249057531357, 'timestamp': '2025-09-30 22:32:01.040937', 'step': 16023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:01.084663', 'step': 16023, 'epoch': 3} {'type': 'loss', 'content': 0.04925789684057236, 'timestamp': '2025-09-30 22:32:01.108592', 'step': 16024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.139463', 'step': 16024, 'epoch': 3} {'type': 'loss', 'content': 0.04869626462459564, 'timestamp': '2025-09-30 22:32:01.145978', 'step': 16025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.177511', 'step': 16025, 'epoch': 3} {'type': 'loss', 'content': 0.08953811228275299, 'timestamp': '2025-09-30 22:32:01.182847', 'step': 16026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:01.215909', 'step': 16026, 'epoch': 3} {'type': 'loss', 'content': 0.08403775840997696, 'timestamp': '2025-09-30 22:32:01.222533', 'step': 16027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:01.259706', 'step': 16027, 'epoch': 3} {'type': 'loss', 'content': 0.11490602791309357, 'timestamp': '2025-09-30 22:32:01.284453', 'step': 16028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.318101', 'step': 16028, 'epoch': 3} {'type': 'loss', 'content': 0.04945927858352661, 'timestamp': '2025-09-30 22:32:01.322271', 'step': 16029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:01.356412', 'step': 16029, 'epoch': 3} {'type': 'loss', 'content': 0.06620150804519653, 'timestamp': '2025-09-30 22:32:01.360371', 'step': 16030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:01.392412', 'step': 16030, 'epoch': 3} {'type': 'loss', 'content': 0.0874340683221817, 'timestamp': '2025-09-30 22:32:01.395992', 'step': 16031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:01.441275', 'step': 16031, 'epoch': 3} {'type': 'loss', 'content': 0.06576982140541077, 'timestamp': '2025-09-30 22:32:01.467076', 'step': 16032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:01.523761', 'step': 16032, 'epoch': 3} {'type': 'loss', 'content': 0.09656497836112976, 'timestamp': '2025-09-30 22:32:01.527784', 'step': 16033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:01.559177', 'step': 16033, 'epoch': 3} {'type': 'loss', 'content': 0.0688040629029274, 'timestamp': '2025-09-30 22:32:01.570884', 'step': 16034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:01.602491', 'step': 16034, 'epoch': 3} {'type': 'loss', 'content': 0.04096115380525589, 'timestamp': '2025-09-30 22:32:01.607153', 'step': 16035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.640282', 'step': 16035, 'epoch': 3} {'type': 'loss', 'content': 0.05915108323097229, 'timestamp': '2025-09-30 22:32:01.673913', 'step': 16036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.714317', 'step': 16036, 'epoch': 3} {'type': 'loss', 'content': 0.046413399279117584, 'timestamp': '2025-09-30 22:32:01.718081', 'step': 16037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:01.751988', 'step': 16037, 'epoch': 3} {'type': 'loss', 'content': 0.15515775978565216, 'timestamp': '2025-09-30 22:32:01.764131', 'step': 16038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.795959', 'step': 16038, 'epoch': 3} {'type': 'loss', 'content': 0.10565738379955292, 'timestamp': '2025-09-30 22:32:01.801547', 'step': 16039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:01.833896', 'step': 16039, 'epoch': 3} {'type': 'loss', 'content': 0.05794442072510719, 'timestamp': '2025-09-30 22:32:01.859524', 'step': 16040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:01.903570', 'step': 16040, 'epoch': 3} {'type': 'loss', 'content': 0.11525779217481613, 'timestamp': '2025-09-30 22:32:01.906952', 'step': 16041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.938774', 'step': 16041, 'epoch': 3} {'type': 'loss', 'content': 0.08188651502132416, 'timestamp': '2025-09-30 22:32:01.950245', 'step': 16042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:01.996156', 'step': 16042, 'epoch': 3} {'type': 'loss', 'content': 0.0568658746778965, 'timestamp': '2025-09-30 22:32:02.005019', 'step': 16043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:02.050033', 'step': 16043, 'epoch': 3} {'type': 'loss', 'content': 0.1366475522518158, 'timestamp': '2025-09-30 22:32:02.089086', 'step': 16044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:02.131392', 'step': 16044, 'epoch': 3} {'type': 'loss', 'content': 0.08106744289398193, 'timestamp': '2025-09-30 22:32:02.135278', 'step': 16045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.168478', 'step': 16045, 'epoch': 3} {'type': 'loss', 'content': 0.0930589884519577, 'timestamp': '2025-09-30 22:32:02.172161', 'step': 16046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:02.205677', 'step': 16046, 'epoch': 3} {'type': 'loss', 'content': 0.0857747420668602, 'timestamp': '2025-09-30 22:32:02.219185', 'step': 16047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.250503', 'step': 16047, 'epoch': 3} {'type': 'loss', 'content': 0.08697690069675446, 'timestamp': '2025-09-30 22:32:02.275642', 'step': 16048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.306267', 'step': 16048, 'epoch': 3} {'type': 'loss', 'content': 0.06328108161687851, 'timestamp': '2025-09-30 22:32:02.310673', 'step': 16049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:02.342935', 'step': 16049, 'epoch': 3} {'type': 'loss', 'content': 0.07024435698986053, 'timestamp': '2025-09-30 22:32:02.352592', 'step': 16050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:02.389705', 'step': 16050, 'epoch': 3} {'type': 'loss', 'content': 0.10539305210113525, 'timestamp': '2025-09-30 22:32:02.394849', 'step': 16051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.428554', 'step': 16051, 'epoch': 3} {'type': 'loss', 'content': 0.06819712370634079, 'timestamp': '2025-09-30 22:32:02.455025', 'step': 16052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:02.499541', 'step': 16052, 'epoch': 3} {'type': 'loss', 'content': 0.14251475036144257, 'timestamp': '2025-09-30 22:32:02.514995', 'step': 16053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:02.546714', 'step': 16053, 'epoch': 3} {'type': 'loss', 'content': 0.009501704014837742, 'timestamp': '2025-09-30 22:32:02.553911', 'step': 16054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:02.586015', 'step': 16054, 'epoch': 3} {'type': 'loss', 'content': 0.06757309287786484, 'timestamp': '2025-09-30 22:32:02.589946', 'step': 16055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:02.621816', 'step': 16055, 'epoch': 3} {'type': 'loss', 'content': 0.15328098833560944, 'timestamp': '2025-09-30 22:32:02.648073', 'step': 16056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.681258', 'step': 16056, 'epoch': 3} {'type': 'loss', 'content': 0.058800626546144485, 'timestamp': '2025-09-30 22:32:02.705023', 'step': 16057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.749418', 'step': 16057, 'epoch': 3} {'type': 'loss', 'content': 0.08627038449048996, 'timestamp': '2025-09-30 22:32:02.762016', 'step': 16058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:02.792782', 'step': 16058, 'epoch': 3} {'type': 'loss', 'content': 0.08530466258525848, 'timestamp': '2025-09-30 22:32:02.796169', 'step': 16059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:02.838252', 'step': 16059, 'epoch': 3} {'type': 'loss', 'content': 0.045343175530433655, 'timestamp': '2025-09-30 22:32:02.871989', 'step': 16060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:02.912065', 'step': 16060, 'epoch': 3} {'type': 'loss', 'content': 0.04889480397105217, 'timestamp': '2025-09-30 22:32:02.915407', 'step': 16061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:02.953444', 'step': 16061, 'epoch': 3} {'type': 'loss', 'content': 0.09054882824420929, 'timestamp': '2025-09-30 22:32:02.956683', 'step': 16062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:02.998279', 'step': 16062, 'epoch': 3} {'type': 'loss', 'content': 0.08682326227426529, 'timestamp': '2025-09-30 22:32:03.002666', 'step': 16063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.052986', 'step': 16063, 'epoch': 3} {'type': 'loss', 'content': 0.08990693837404251, 'timestamp': '2025-09-30 22:32:03.079291', 'step': 16064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:03.126556', 'step': 16064, 'epoch': 3} {'type': 'loss', 'content': 0.08249345421791077, 'timestamp': '2025-09-30 22:32:03.131933', 'step': 16065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.173685', 'step': 16065, 'epoch': 3} {'type': 'loss', 'content': 0.03897467255592346, 'timestamp': '2025-09-30 22:32:03.178419', 'step': 16066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.221635', 'step': 16066, 'epoch': 3} {'type': 'loss', 'content': 0.046766940504312515, 'timestamp': '2025-09-30 22:32:03.238487', 'step': 16067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:03.272368', 'step': 16067, 'epoch': 3} {'type': 'loss', 'content': 0.045192040503025055, 'timestamp': '2025-09-30 22:32:03.305161', 'step': 16068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:03.350401', 'step': 16068, 'epoch': 3} {'type': 'loss', 'content': 0.09368423372507095, 'timestamp': '2025-09-30 22:32:03.353769', 'step': 16069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.394627', 'step': 16069, 'epoch': 3} {'type': 'loss', 'content': 0.07899712771177292, 'timestamp': '2025-09-30 22:32:03.399852', 'step': 16070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:03.433188', 'step': 16070, 'epoch': 3} {'type': 'loss', 'content': 0.06749754399061203, 'timestamp': '2025-09-30 22:32:03.448754', 'step': 16071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:03.482724', 'step': 16071, 'epoch': 3} {'type': 'loss', 'content': 0.04514748603105545, 'timestamp': '2025-09-30 22:32:03.509550', 'step': 16072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.551105', 'step': 16072, 'epoch': 3} {'type': 'loss', 'content': 0.08012721687555313, 'timestamp': '2025-09-30 22:32:03.554360', 'step': 16073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.586539', 'step': 16073, 'epoch': 3} {'type': 'loss', 'content': 0.07132390886545181, 'timestamp': '2025-09-30 22:32:03.592583', 'step': 16074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.626123', 'step': 16074, 'epoch': 3} {'type': 'loss', 'content': 0.06973636150360107, 'timestamp': '2025-09-30 22:32:03.632116', 'step': 16075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:32:03.679028', 'step': 16075, 'epoch': 3} {'type': 'loss', 'content': 0.02758592925965786, 'timestamp': '2025-09-30 22:32:03.707181', 'step': 16076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:03.739503', 'step': 16076, 'epoch': 3} {'type': 'loss', 'content': 0.07395191490650177, 'timestamp': '2025-09-30 22:32:03.745659', 'step': 16077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:03.781227', 'step': 16077, 'epoch': 3} {'type': 'loss', 'content': 0.1096261739730835, 'timestamp': '2025-09-30 22:32:03.786239', 'step': 16078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:03.821063', 'step': 16078, 'epoch': 3} {'type': 'loss', 'content': 0.029171792790293694, 'timestamp': '2025-09-30 22:32:03.826281', 'step': 16079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:03.864460', 'step': 16079, 'epoch': 3} {'type': 'loss', 'content': 0.05607559159398079, 'timestamp': '2025-09-30 22:32:03.890007', 'step': 16080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:03.924460', 'step': 16080, 'epoch': 3} {'type': 'loss', 'content': 0.10530252754688263, 'timestamp': '2025-09-30 22:32:03.928706', 'step': 16081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:03.962561', 'step': 16081, 'epoch': 3} {'type': 'loss', 'content': 0.1302894651889801, 'timestamp': '2025-09-30 22:32:03.965635', 'step': 16082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.000321', 'step': 16082, 'epoch': 3} {'type': 'loss', 'content': 0.039963629096746445, 'timestamp': '2025-09-30 22:32:04.010985', 'step': 16083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:04.043278', 'step': 16083, 'epoch': 3} {'type': 'loss', 'content': 0.024235233664512634, 'timestamp': '2025-09-30 22:32:04.077018', 'step': 16084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:04.109409', 'step': 16084, 'epoch': 3} {'type': 'loss', 'content': 0.09667760133743286, 'timestamp': '2025-09-30 22:32:04.114636', 'step': 16085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.151641', 'step': 16085, 'epoch': 3} {'type': 'loss', 'content': 0.0481758676469326, 'timestamp': '2025-09-30 22:32:04.156982', 'step': 16086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.191702', 'step': 16086, 'epoch': 3} {'type': 'loss', 'content': 0.07400315999984741, 'timestamp': '2025-09-30 22:32:04.195095', 'step': 16087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:04.230340', 'step': 16087, 'epoch': 3} {'type': 'loss', 'content': 0.025659190490841866, 'timestamp': '2025-09-30 22:32:04.255183', 'step': 16088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.289342', 'step': 16088, 'epoch': 3} {'type': 'loss', 'content': 0.06310645490884781, 'timestamp': '2025-09-30 22:32:04.292083', 'step': 16089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.322805', 'step': 16089, 'epoch': 3} {'type': 'loss', 'content': 0.15098680555820465, 'timestamp': '2025-09-30 22:32:04.334382', 'step': 16090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.372906', 'step': 16090, 'epoch': 3} {'type': 'loss', 'content': 0.0866968110203743, 'timestamp': '2025-09-30 22:32:04.377447', 'step': 16091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.411220', 'step': 16091, 'epoch': 3} {'type': 'loss', 'content': 0.07970453798770905, 'timestamp': '2025-09-30 22:32:04.436837', 'step': 16092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.470667', 'step': 16092, 'epoch': 3} {'type': 'loss', 'content': 0.025550255551934242, 'timestamp': '2025-09-30 22:32:04.481545', 'step': 16093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:04.513746', 'step': 16093, 'epoch': 3} {'type': 'loss', 'content': 0.08629357814788818, 'timestamp': '2025-09-30 22:32:04.519452', 'step': 16094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.560833', 'step': 16094, 'epoch': 3} {'type': 'loss', 'content': 0.0831611305475235, 'timestamp': '2025-09-30 22:32:04.571824', 'step': 16095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:04.614272', 'step': 16095, 'epoch': 3} {'type': 'loss', 'content': 0.13009004294872284, 'timestamp': '2025-09-30 22:32:04.640885', 'step': 16096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:04.680777', 'step': 16096, 'epoch': 3} {'type': 'loss', 'content': 0.042120348662137985, 'timestamp': '2025-09-30 22:32:04.695040', 'step': 16097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:04.742469', 'step': 16097, 'epoch': 3} {'type': 'loss', 'content': 0.08302758634090424, 'timestamp': '2025-09-30 22:32:04.756165', 'step': 16098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.801865', 'step': 16098, 'epoch': 3} {'type': 'loss', 'content': 0.08253151178359985, 'timestamp': '2025-09-30 22:32:04.813263', 'step': 16099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:04.844828', 'step': 16099, 'epoch': 3} {'type': 'loss', 'content': 0.09592937678098679, 'timestamp': '2025-09-30 22:32:04.870102', 'step': 16100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.917269', 'step': 16100, 'epoch': 3} {'type': 'loss', 'content': 0.0639161542057991, 'timestamp': '2025-09-30 22:32:04.934611', 'step': 16101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:04.969642', 'step': 16101, 'epoch': 3} {'type': 'loss', 'content': 0.07292360812425613, 'timestamp': '2025-09-30 22:32:04.973475', 'step': 16102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.007467', 'step': 16102, 'epoch': 3} {'type': 'loss', 'content': 0.093074269592762, 'timestamp': '2025-09-30 22:32:05.011813', 'step': 16103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:05.045601', 'step': 16103, 'epoch': 3} {'type': 'loss', 'content': 0.050520408898591995, 'timestamp': '2025-09-30 22:32:05.070783', 'step': 16104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:05.103705', 'step': 16104, 'epoch': 3} {'type': 'loss', 'content': 0.021878033876419067, 'timestamp': '2025-09-30 22:32:05.107506', 'step': 16105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.140884', 'step': 16105, 'epoch': 3} {'type': 'loss', 'content': 0.03723221644759178, 'timestamp': '2025-09-30 22:32:05.146051', 'step': 16106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:05.179452', 'step': 16106, 'epoch': 3} {'type': 'loss', 'content': 0.08969427645206451, 'timestamp': '2025-09-30 22:32:05.183237', 'step': 16107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.218063', 'step': 16107, 'epoch': 3} {'type': 'loss', 'content': 0.08551806956529617, 'timestamp': '2025-09-30 22:32:05.243010', 'step': 16108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.275281', 'step': 16108, 'epoch': 3} {'type': 'loss', 'content': 0.022623401135206223, 'timestamp': '2025-09-30 22:32:05.280413', 'step': 16109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:05.320984', 'step': 16109, 'epoch': 3} {'type': 'loss', 'content': 0.0502527616918087, 'timestamp': '2025-09-30 22:32:05.325058', 'step': 16110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:05.358391', 'step': 16110, 'epoch': 3} {'type': 'loss', 'content': 0.0859161838889122, 'timestamp': '2025-09-30 22:32:05.361224', 'step': 16111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:05.401417', 'step': 16111, 'epoch': 3} {'type': 'loss', 'content': 0.052223239094018936, 'timestamp': '2025-09-30 22:32:05.426409', 'step': 16112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.457436', 'step': 16112, 'epoch': 3} {'type': 'loss', 'content': 0.12388629466295242, 'timestamp': '2025-09-30 22:32:05.462579', 'step': 16113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.495031', 'step': 16113, 'epoch': 3} {'type': 'loss', 'content': 0.10270749777555466, 'timestamp': '2025-09-30 22:32:05.516214', 'step': 16114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.550058', 'step': 16114, 'epoch': 3} {'type': 'loss', 'content': 0.058454662561416626, 'timestamp': '2025-09-30 22:32:05.554659', 'step': 16115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.593759', 'step': 16115, 'epoch': 3} {'type': 'loss', 'content': 0.049636535346508026, 'timestamp': '2025-09-30 22:32:05.618313', 'step': 16116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.650607', 'step': 16116, 'epoch': 3} {'type': 'loss', 'content': 0.06595944613218307, 'timestamp': '2025-09-30 22:32:05.653535', 'step': 16117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.684875', 'step': 16117, 'epoch': 3} {'type': 'loss', 'content': 0.08527850359678268, 'timestamp': '2025-09-30 22:32:05.688232', 'step': 16118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.734065', 'step': 16118, 'epoch': 3} {'type': 'loss', 'content': 0.023615481331944466, 'timestamp': '2025-09-30 22:32:05.737541', 'step': 16119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:05.767480', 'step': 16119, 'epoch': 3} {'type': 'loss', 'content': 0.07571378350257874, 'timestamp': '2025-09-30 22:32:05.792490', 'step': 16120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:05.827674', 'step': 16120, 'epoch': 3} {'type': 'loss', 'content': 0.013806529343128204, 'timestamp': '2025-09-30 22:32:05.832160', 'step': 16121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:05.874857', 'step': 16121, 'epoch': 3} {'type': 'loss', 'content': 0.037565577775239944, 'timestamp': '2025-09-30 22:32:05.888686', 'step': 16122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:05.921597', 'step': 16122, 'epoch': 3} {'type': 'loss', 'content': 0.05644211545586586, 'timestamp': '2025-09-30 22:32:05.933126', 'step': 16123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:05.973953', 'step': 16123, 'epoch': 3} {'type': 'loss', 'content': 0.08059398829936981, 'timestamp': '2025-09-30 22:32:06.000949', 'step': 16124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.032782', 'step': 16124, 'epoch': 3} {'type': 'loss', 'content': 0.05675589293241501, 'timestamp': '2025-09-30 22:32:06.036278', 'step': 16125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:06.069962', 'step': 16125, 'epoch': 3} {'type': 'loss', 'content': 0.15768377482891083, 'timestamp': '2025-09-30 22:32:06.074103', 'step': 16126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.108107', 'step': 16126, 'epoch': 3} {'type': 'loss', 'content': 0.07194824516773224, 'timestamp': '2025-09-30 22:32:06.111874', 'step': 16127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.150565', 'step': 16127, 'epoch': 3} {'type': 'loss', 'content': 0.05894596874713898, 'timestamp': '2025-09-30 22:32:06.175362', 'step': 16128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:06.206796', 'step': 16128, 'epoch': 3} {'type': 'loss', 'content': 0.07720424979925156, 'timestamp': '2025-09-30 22:32:06.211225', 'step': 16129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:06.245381', 'step': 16129, 'epoch': 3} {'type': 'loss', 'content': 0.14741668105125427, 'timestamp': '2025-09-30 22:32:06.249856', 'step': 16130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.293257', 'step': 16130, 'epoch': 3} {'type': 'loss', 'content': 0.02997433766722679, 'timestamp': '2025-09-30 22:32:06.296615', 'step': 16131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:32:06.328891', 'step': 16131, 'epoch': 3} {'type': 'loss', 'content': 0.10346859693527222, 'timestamp': '2025-09-30 22:32:06.359720', 'step': 16132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:06.391523', 'step': 16132, 'epoch': 3} {'type': 'loss', 'content': 0.05359085649251938, 'timestamp': '2025-09-30 22:32:06.396496', 'step': 16133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:06.436308', 'step': 16133, 'epoch': 3} {'type': 'loss', 'content': 0.05709734559059143, 'timestamp': '2025-09-30 22:32:06.440494', 'step': 16134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.473427', 'step': 16134, 'epoch': 3} {'type': 'loss', 'content': 0.0944192036986351, 'timestamp': '2025-09-30 22:32:06.484134', 'step': 16135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.520763', 'step': 16135, 'epoch': 3} {'type': 'loss', 'content': 0.0580022819340229, 'timestamp': '2025-09-30 22:32:06.546476', 'step': 16136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.583835', 'step': 16136, 'epoch': 3} {'type': 'loss', 'content': 0.09239447116851807, 'timestamp': '2025-09-30 22:32:06.598668', 'step': 16137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.632491', 'step': 16137, 'epoch': 3} {'type': 'loss', 'content': 0.07605487108230591, 'timestamp': '2025-09-30 22:32:06.648687', 'step': 16138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:06.681299', 'step': 16138, 'epoch': 3} {'type': 'loss', 'content': 0.06871095299720764, 'timestamp': '2025-09-30 22:32:06.686751', 'step': 16139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:06.718356', 'step': 16139, 'epoch': 3} {'type': 'loss', 'content': 0.06944790482521057, 'timestamp': '2025-09-30 22:32:06.742726', 'step': 16140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:06.781173', 'step': 16140, 'epoch': 3} {'type': 'loss', 'content': 0.10139992088079453, 'timestamp': '2025-09-30 22:32:06.790848', 'step': 16141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:06.822507', 'step': 16141, 'epoch': 3} {'type': 'loss', 'content': 0.06436166912317276, 'timestamp': '2025-09-30 22:32:06.826238', 'step': 16142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.858202', 'step': 16142, 'epoch': 3} {'type': 'loss', 'content': 0.03750111162662506, 'timestamp': '2025-09-30 22:32:06.862357', 'step': 16143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:06.899063', 'step': 16143, 'epoch': 3} {'type': 'loss', 'content': 0.042558491230010986, 'timestamp': '2025-09-30 22:32:06.927940', 'step': 16144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:06.959085', 'step': 16144, 'epoch': 3} {'type': 'loss', 'content': 0.06672298908233643, 'timestamp': '2025-09-30 22:32:06.963716', 'step': 16145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.005769', 'step': 16145, 'epoch': 3} {'type': 'loss', 'content': 0.13958632946014404, 'timestamp': '2025-09-30 22:32:07.009344', 'step': 16146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.043900', 'step': 16146, 'epoch': 3} {'type': 'loss', 'content': 0.11203364282846451, 'timestamp': '2025-09-30 22:32:07.046687', 'step': 16147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:07.086293', 'step': 16147, 'epoch': 3} {'type': 'loss', 'content': 0.06344082951545715, 'timestamp': '2025-09-30 22:32:07.111720', 'step': 16148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.149158', 'step': 16148, 'epoch': 3} {'type': 'loss', 'content': 0.031248776242136955, 'timestamp': '2025-09-30 22:32:07.155031', 'step': 16149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.190412', 'step': 16149, 'epoch': 3} {'type': 'loss', 'content': 0.11781910806894302, 'timestamp': '2025-09-30 22:32:07.193917', 'step': 16150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:07.234695', 'step': 16150, 'epoch': 3} {'type': 'loss', 'content': 0.18933764100074768, 'timestamp': '2025-09-30 22:32:07.245513', 'step': 16151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:07.278778', 'step': 16151, 'epoch': 3} {'type': 'loss', 'content': 0.1151953935623169, 'timestamp': '2025-09-30 22:32:07.303903', 'step': 16152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.337914', 'step': 16152, 'epoch': 3} {'type': 'loss', 'content': 0.060082998126745224, 'timestamp': '2025-09-30 22:32:07.340663', 'step': 16153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:07.377328', 'step': 16153, 'epoch': 3} {'type': 'loss', 'content': 0.0758756473660469, 'timestamp': '2025-09-30 22:32:07.380443', 'step': 16154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.413849', 'step': 16154, 'epoch': 3} {'type': 'loss', 'content': 0.04953496903181076, 'timestamp': '2025-09-30 22:32:07.417145', 'step': 16155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.448571', 'step': 16155, 'epoch': 3} {'type': 'loss', 'content': 0.03262937441468239, 'timestamp': '2025-09-30 22:32:07.473192', 'step': 16156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:07.509625', 'step': 16156, 'epoch': 3} {'type': 'loss', 'content': 0.12509623169898987, 'timestamp': '2025-09-30 22:32:07.527345', 'step': 16157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 272], 'flops': 8068526078144}, 'timestamp': '2025-09-30 22:32:07.561714', 'step': 16157, 'epoch': 3} {'type': 'loss', 'content': 0.07300613075494766, 'timestamp': '2025-09-30 22:32:07.572215', 'step': 16158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.610727', 'step': 16158, 'epoch': 3} {'type': 'loss', 'content': 0.07454437017440796, 'timestamp': '2025-09-30 22:32:07.615596', 'step': 16159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.651225', 'step': 16159, 'epoch': 3} {'type': 'loss', 'content': 0.06693314760923386, 'timestamp': '2025-09-30 22:32:07.684721', 'step': 16160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.728334', 'step': 16160, 'epoch': 3} {'type': 'loss', 'content': 0.12934468686580658, 'timestamp': '2025-09-30 22:32:07.733274', 'step': 16161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.771320', 'step': 16161, 'epoch': 3} {'type': 'loss', 'content': 0.06946590542793274, 'timestamp': '2025-09-30 22:32:07.776069', 'step': 16162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:07.808762', 'step': 16162, 'epoch': 3} {'type': 'loss', 'content': 0.051596734672784805, 'timestamp': '2025-09-30 22:32:07.813433', 'step': 16163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.848757', 'step': 16163, 'epoch': 3} {'type': 'loss', 'content': 0.09240402281284332, 'timestamp': '2025-09-30 22:32:07.898907', 'step': 16164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:07.944676', 'step': 16164, 'epoch': 3} {'type': 'loss', 'content': 0.06261277943849564, 'timestamp': '2025-09-30 22:32:07.949353', 'step': 16165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:07.986279', 'step': 16165, 'epoch': 3} {'type': 'loss', 'content': 0.039567265659570694, 'timestamp': '2025-09-30 22:32:08.002582', 'step': 16166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:08.037180', 'step': 16166, 'epoch': 3} {'type': 'loss', 'content': 0.06864948570728302, 'timestamp': '2025-09-30 22:32:08.044517', 'step': 16167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.077787', 'step': 16167, 'epoch': 3} {'type': 'loss', 'content': 0.04604952782392502, 'timestamp': '2025-09-30 22:32:08.108386', 'step': 16168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.140338', 'step': 16168, 'epoch': 3} {'type': 'loss', 'content': 0.052754733711481094, 'timestamp': '2025-09-30 22:32:08.147724', 'step': 16169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.179165', 'step': 16169, 'epoch': 3} {'type': 'loss', 'content': 0.09843378514051437, 'timestamp': '2025-09-30 22:32:08.183287', 'step': 16170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:08.215265', 'step': 16170, 'epoch': 3} {'type': 'loss', 'content': 0.05144093558192253, 'timestamp': '2025-09-30 22:32:08.218621', 'step': 16171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:08.251002', 'step': 16171, 'epoch': 3} {'type': 'loss', 'content': 0.08487922698259354, 'timestamp': '2025-09-30 22:32:08.276000', 'step': 16172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.307216', 'step': 16172, 'epoch': 3} {'type': 'loss', 'content': 0.0939256101846695, 'timestamp': '2025-09-30 22:32:08.309599', 'step': 16173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.340495', 'step': 16173, 'epoch': 3} {'type': 'loss', 'content': 0.06787387281656265, 'timestamp': '2025-09-30 22:32:08.346302', 'step': 16174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.377415', 'step': 16174, 'epoch': 3} {'type': 'loss', 'content': 0.041078660637140274, 'timestamp': '2025-09-30 22:32:08.383449', 'step': 16175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:08.416263', 'step': 16175, 'epoch': 3} {'type': 'loss', 'content': 0.09746112674474716, 'timestamp': '2025-09-30 22:32:08.443733', 'step': 16176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.475823', 'step': 16176, 'epoch': 3} {'type': 'loss', 'content': 0.04555322602391243, 'timestamp': '2025-09-30 22:32:08.478400', 'step': 16177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.510772', 'step': 16177, 'epoch': 3} {'type': 'loss', 'content': 0.1219920665025711, 'timestamp': '2025-09-30 22:32:08.513768', 'step': 16178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.548767', 'step': 16178, 'epoch': 3} {'type': 'loss', 'content': 0.13261646032333374, 'timestamp': '2025-09-30 22:32:08.554859', 'step': 16179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.590094', 'step': 16179, 'epoch': 3} {'type': 'loss', 'content': 0.10997134447097778, 'timestamp': '2025-09-30 22:32:08.616405', 'step': 16180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:08.649276', 'step': 16180, 'epoch': 3} {'type': 'loss', 'content': 0.053405966609716415, 'timestamp': '2025-09-30 22:32:08.659845', 'step': 16181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:08.701659', 'step': 16181, 'epoch': 3} {'type': 'loss', 'content': 0.15062257647514343, 'timestamp': '2025-09-30 22:32:08.706774', 'step': 16182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.741284', 'step': 16182, 'epoch': 3} {'type': 'loss', 'content': 0.07227851450443268, 'timestamp': '2025-09-30 22:32:08.746580', 'step': 16183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.780193', 'step': 16183, 'epoch': 3} {'type': 'loss', 'content': 0.0489288754761219, 'timestamp': '2025-09-30 22:32:08.804097', 'step': 16184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.834902', 'step': 16184, 'epoch': 3} {'type': 'loss', 'content': 0.037906449288129807, 'timestamp': '2025-09-30 22:32:08.837559', 'step': 16185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:08.884932', 'step': 16185, 'epoch': 3} {'type': 'loss', 'content': 0.0391337089240551, 'timestamp': '2025-09-30 22:32:08.888117', 'step': 16186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:08.919680', 'step': 16186, 'epoch': 3} {'type': 'loss', 'content': 0.15326116979122162, 'timestamp': '2025-09-30 22:32:08.924143', 'step': 16187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:08.955086', 'step': 16187, 'epoch': 3} {'type': 'loss', 'content': 0.01859353482723236, 'timestamp': '2025-09-30 22:32:08.979207', 'step': 16188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.017395', 'step': 16188, 'epoch': 3} {'type': 'loss', 'content': 0.06969713419675827, 'timestamp': '2025-09-30 22:32:09.021047', 'step': 16189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:09.053896', 'step': 16189, 'epoch': 3} {'type': 'loss', 'content': 0.05845164880156517, 'timestamp': '2025-09-30 22:32:09.058076', 'step': 16190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:09.089082', 'step': 16190, 'epoch': 3} {'type': 'loss', 'content': 0.07908892631530762, 'timestamp': '2025-09-30 22:32:09.099124', 'step': 16191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:09.134816', 'step': 16191, 'epoch': 3} {'type': 'loss', 'content': 0.05077395215630531, 'timestamp': '2025-09-30 22:32:09.160453', 'step': 16192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:09.191406', 'step': 16192, 'epoch': 3} {'type': 'loss', 'content': 0.10988069325685501, 'timestamp': '2025-09-30 22:32:09.195337', 'step': 16193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.226948', 'step': 16193, 'epoch': 3} {'type': 'loss', 'content': 0.056245289742946625, 'timestamp': '2025-09-30 22:32:09.231331', 'step': 16194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.262631', 'step': 16194, 'epoch': 3} {'type': 'loss', 'content': 0.06311258673667908, 'timestamp': '2025-09-30 22:32:09.266462', 'step': 16195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:09.300178', 'step': 16195, 'epoch': 3} {'type': 'loss', 'content': 0.09182342886924744, 'timestamp': '2025-09-30 22:32:09.331573', 'step': 16196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.364948', 'step': 16196, 'epoch': 3} {'type': 'loss', 'content': 0.10709743946790695, 'timestamp': '2025-09-30 22:32:09.371629', 'step': 16197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:09.409674', 'step': 16197, 'epoch': 3} {'type': 'loss', 'content': 0.07333997637033463, 'timestamp': '2025-09-30 22:32:09.414727', 'step': 16198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:09.449872', 'step': 16198, 'epoch': 3} {'type': 'loss', 'content': 0.07237392663955688, 'timestamp': '2025-09-30 22:32:09.453638', 'step': 16199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:09.486233', 'step': 16199, 'epoch': 3} {'type': 'loss', 'content': 0.06308212131261826, 'timestamp': '2025-09-30 22:32:09.511793', 'step': 16200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:09.545354', 'step': 16200, 'epoch': 3} {'type': 'loss', 'content': 0.10862470418214798, 'timestamp': '2025-09-30 22:32:09.558998', 'step': 16201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.604050', 'step': 16201, 'epoch': 3} {'type': 'loss', 'content': 0.07674148678779602, 'timestamp': '2025-09-30 22:32:09.607388', 'step': 16202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:09.638549', 'step': 16202, 'epoch': 3} {'type': 'loss', 'content': 0.12883365154266357, 'timestamp': '2025-09-30 22:32:09.640871', 'step': 16203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.670538', 'step': 16203, 'epoch': 3} {'type': 'loss', 'content': 0.08724263310432434, 'timestamp': '2025-09-30 22:32:09.695492', 'step': 16204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:09.733158', 'step': 16204, 'epoch': 3} {'type': 'loss', 'content': 0.028747379779815674, 'timestamp': '2025-09-30 22:32:09.738273', 'step': 16205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.781336', 'step': 16205, 'epoch': 3} {'type': 'loss', 'content': 0.1118636429309845, 'timestamp': '2025-09-30 22:32:09.784253', 'step': 16206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.817324', 'step': 16206, 'epoch': 3} {'type': 'loss', 'content': 0.14649662375450134, 'timestamp': '2025-09-30 22:32:09.820691', 'step': 16207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.884011', 'step': 16207, 'epoch': 3} {'type': 'loss', 'content': 0.04681381583213806, 'timestamp': '2025-09-30 22:32:09.909894', 'step': 16208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:09.944772', 'step': 16208, 'epoch': 3} {'type': 'loss', 'content': 0.0714944452047348, 'timestamp': '2025-09-30 22:32:09.947624', 'step': 16209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:09.986700', 'step': 16209, 'epoch': 3} {'type': 'loss', 'content': 0.07538491487503052, 'timestamp': '2025-09-30 22:32:09.992243', 'step': 16210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.028803', 'step': 16210, 'epoch': 3} {'type': 'loss', 'content': 0.10246167331933975, 'timestamp': '2025-09-30 22:32:10.035705', 'step': 16211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.076530', 'step': 16211, 'epoch': 3} {'type': 'loss', 'content': 0.03888220712542534, 'timestamp': '2025-09-30 22:32:10.103355', 'step': 16212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.141489', 'step': 16212, 'epoch': 3} {'type': 'loss', 'content': 0.05964197590947151, 'timestamp': '2025-09-30 22:32:10.145416', 'step': 16213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.177578', 'step': 16213, 'epoch': 3} {'type': 'loss', 'content': 0.061236537992954254, 'timestamp': '2025-09-30 22:32:10.183501', 'step': 16214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:10.219498', 'step': 16214, 'epoch': 3} {'type': 'loss', 'content': 0.05745428428053856, 'timestamp': '2025-09-30 22:32:10.224454', 'step': 16215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.277371', 'step': 16215, 'epoch': 3} {'type': 'loss', 'content': 0.10145077854394913, 'timestamp': '2025-09-30 22:32:10.303752', 'step': 16216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.335282', 'step': 16216, 'epoch': 3} {'type': 'loss', 'content': 0.024649979546666145, 'timestamp': '2025-09-30 22:32:10.340562', 'step': 16217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.392274', 'step': 16217, 'epoch': 3} {'type': 'loss', 'content': 0.05741715431213379, 'timestamp': '2025-09-30 22:32:10.396320', 'step': 16218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.428745', 'step': 16218, 'epoch': 3} {'type': 'loss', 'content': 0.0772278681397438, 'timestamp': '2025-09-30 22:32:10.432304', 'step': 16219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:10.465812', 'step': 16219, 'epoch': 3} {'type': 'loss', 'content': 0.05366535112261772, 'timestamp': '2025-09-30 22:32:10.492507', 'step': 16220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.524953', 'step': 16220, 'epoch': 3} {'type': 'loss', 'content': 0.13798420131206512, 'timestamp': '2025-09-30 22:32:10.527805', 'step': 16221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.559079', 'step': 16221, 'epoch': 3} {'type': 'loss', 'content': 0.09676025807857513, 'timestamp': '2025-09-30 22:32:10.562483', 'step': 16222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.593310', 'step': 16222, 'epoch': 3} {'type': 'loss', 'content': 0.10351253300905228, 'timestamp': '2025-09-30 22:32:10.596133', 'step': 16223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.627713', 'step': 16223, 'epoch': 3} {'type': 'loss', 'content': 0.09831460565328598, 'timestamp': '2025-09-30 22:32:10.657230', 'step': 16224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:10.690939', 'step': 16224, 'epoch': 3} {'type': 'loss', 'content': 0.07904558628797531, 'timestamp': '2025-09-30 22:32:10.696993', 'step': 16225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:10.729861', 'step': 16225, 'epoch': 3} {'type': 'loss', 'content': 0.1030765026807785, 'timestamp': '2025-09-30 22:32:10.734355', 'step': 16226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.769177', 'step': 16226, 'epoch': 3} {'type': 'loss', 'content': 0.15146906673908234, 'timestamp': '2025-09-30 22:32:10.773486', 'step': 16227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:10.814104', 'step': 16227, 'epoch': 3} {'type': 'loss', 'content': 0.10004357248544693, 'timestamp': '2025-09-30 22:32:10.839244', 'step': 16228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:10.888104', 'step': 16228, 'epoch': 3} {'type': 'loss', 'content': 0.04604411870241165, 'timestamp': '2025-09-30 22:32:10.894868', 'step': 16229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:10.928795', 'step': 16229, 'epoch': 3} {'type': 'loss', 'content': 0.05481942743062973, 'timestamp': '2025-09-30 22:32:10.933474', 'step': 16230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:10.966620', 'step': 16230, 'epoch': 3} {'type': 'loss', 'content': 0.06654300540685654, 'timestamp': '2025-09-30 22:32:10.979286', 'step': 16231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:11.022053', 'step': 16231, 'epoch': 3} {'type': 'loss', 'content': 0.0482945516705513, 'timestamp': '2025-09-30 22:32:11.054082', 'step': 16232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.090098', 'step': 16232, 'epoch': 3} {'type': 'loss', 'content': 0.11287549138069153, 'timestamp': '2025-09-30 22:32:11.093422', 'step': 16233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:11.124569', 'step': 16233, 'epoch': 3} {'type': 'loss', 'content': 0.07528281956911087, 'timestamp': '2025-09-30 22:32:11.129008', 'step': 16234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:11.159871', 'step': 16234, 'epoch': 3} {'type': 'loss', 'content': 0.027895381674170494, 'timestamp': '2025-09-30 22:32:11.163360', 'step': 16235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.200921', 'step': 16235, 'epoch': 3} {'type': 'loss', 'content': 0.12187763303518295, 'timestamp': '2025-09-30 22:32:11.225635', 'step': 16236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.263706', 'step': 16236, 'epoch': 3} {'type': 'loss', 'content': 0.04631389304995537, 'timestamp': '2025-09-30 22:32:11.266874', 'step': 16237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.309987', 'step': 16237, 'epoch': 3} {'type': 'loss', 'content': 0.05924319475889206, 'timestamp': '2025-09-30 22:32:11.317345', 'step': 16238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:11.354172', 'step': 16238, 'epoch': 3} {'type': 'loss', 'content': 0.03584785386919975, 'timestamp': '2025-09-30 22:32:11.357646', 'step': 16239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.390612', 'step': 16239, 'epoch': 3} {'type': 'loss', 'content': 0.025221196934580803, 'timestamp': '2025-09-30 22:32:11.415987', 'step': 16240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:11.456275', 'step': 16240, 'epoch': 3} {'type': 'loss', 'content': 0.10414502769708633, 'timestamp': '2025-09-30 22:32:11.467608', 'step': 16241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:11.516777', 'step': 16241, 'epoch': 3} {'type': 'loss', 'content': 0.044709041714668274, 'timestamp': '2025-09-30 22:32:11.522737', 'step': 16242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.577692', 'step': 16242, 'epoch': 3} {'type': 'loss', 'content': 0.09425249695777893, 'timestamp': '2025-09-30 22:32:11.598522', 'step': 16243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:11.639444', 'step': 16243, 'epoch': 3} {'type': 'loss', 'content': 0.0705191045999527, 'timestamp': '2025-09-30 22:32:11.666871', 'step': 16244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:11.712253', 'step': 16244, 'epoch': 3} {'type': 'loss', 'content': 0.09299293160438538, 'timestamp': '2025-09-30 22:32:11.719377', 'step': 16245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:11.750775', 'step': 16245, 'epoch': 3} {'type': 'loss', 'content': 0.07478571683168411, 'timestamp': '2025-09-30 22:32:11.754810', 'step': 16246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:11.785536', 'step': 16246, 'epoch': 3} {'type': 'loss', 'content': 0.10387326776981354, 'timestamp': '2025-09-30 22:32:11.788434', 'step': 16247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:11.832833', 'step': 16247, 'epoch': 3} {'type': 'loss', 'content': 0.06384234875440598, 'timestamp': '2025-09-30 22:32:11.857798', 'step': 16248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:11.891346', 'step': 16248, 'epoch': 3} {'type': 'loss', 'content': 0.07736387848854065, 'timestamp': '2025-09-30 22:32:11.894551', 'step': 16249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:11.928177', 'step': 16249, 'epoch': 3} {'type': 'loss', 'content': 0.03344951570034027, 'timestamp': '2025-09-30 22:32:11.936238', 'step': 16250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:11.968633', 'step': 16250, 'epoch': 3} {'type': 'loss', 'content': 0.07495731860399246, 'timestamp': '2025-09-30 22:32:11.972758', 'step': 16251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:12.004644', 'step': 16251, 'epoch': 3} {'type': 'loss', 'content': 0.15767508745193481, 'timestamp': '2025-09-30 22:32:12.033321', 'step': 16252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.064099', 'step': 16252, 'epoch': 3} {'type': 'loss', 'content': 0.1031365916132927, 'timestamp': '2025-09-30 22:32:12.077885', 'step': 16253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:12.109906', 'step': 16253, 'epoch': 3} {'type': 'loss', 'content': 0.09504483640193939, 'timestamp': '2025-09-30 22:32:12.129510', 'step': 16254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:12.164517', 'step': 16254, 'epoch': 3} {'type': 'loss', 'content': 0.06287921220064163, 'timestamp': '2025-09-30 22:32:12.172283', 'step': 16255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.208271', 'step': 16255, 'epoch': 3} {'type': 'loss', 'content': 0.03993579372763634, 'timestamp': '2025-09-30 22:32:12.235979', 'step': 16256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:12.268149', 'step': 16256, 'epoch': 3} {'type': 'loss', 'content': 0.04356708005070686, 'timestamp': '2025-09-30 22:32:12.274253', 'step': 16257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:12.307610', 'step': 16257, 'epoch': 3} {'type': 'loss', 'content': 0.026796095073223114, 'timestamp': '2025-09-30 22:32:12.312543', 'step': 16258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:12.353931', 'step': 16258, 'epoch': 3} {'type': 'loss', 'content': 0.09215466678142548, 'timestamp': '2025-09-30 22:32:12.358162', 'step': 16259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:12.402013', 'step': 16259, 'epoch': 3} {'type': 'loss', 'content': 0.03783102706074715, 'timestamp': '2025-09-30 22:32:12.428237', 'step': 16260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.471094', 'step': 16260, 'epoch': 3} {'type': 'loss', 'content': 0.09719304740428925, 'timestamp': '2025-09-30 22:32:12.474250', 'step': 16261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.512009', 'step': 16261, 'epoch': 3} {'type': 'loss', 'content': 0.06680074334144592, 'timestamp': '2025-09-30 22:32:12.515272', 'step': 16262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:12.547606', 'step': 16262, 'epoch': 3} {'type': 'loss', 'content': 0.022898532450199127, 'timestamp': '2025-09-30 22:32:12.559239', 'step': 16263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:12.590012', 'step': 16263, 'epoch': 3} {'type': 'loss', 'content': 0.04379987344145775, 'timestamp': '2025-09-30 22:32:12.621767', 'step': 16264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:12.658538', 'step': 16264, 'epoch': 3} {'type': 'loss', 'content': 0.11031730473041534, 'timestamp': '2025-09-30 22:32:12.662660', 'step': 16265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.694832', 'step': 16265, 'epoch': 3} {'type': 'loss', 'content': 0.01534937135875225, 'timestamp': '2025-09-30 22:32:12.698748', 'step': 16266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:12.735902', 'step': 16266, 'epoch': 3} {'type': 'loss', 'content': 0.09996022284030914, 'timestamp': '2025-09-30 22:32:12.742914', 'step': 16267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.785602', 'step': 16267, 'epoch': 3} {'type': 'loss', 'content': 0.1335635632276535, 'timestamp': '2025-09-30 22:32:12.819587', 'step': 16268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.853173', 'step': 16268, 'epoch': 3} {'type': 'loss', 'content': 0.10317467898130417, 'timestamp': '2025-09-30 22:32:12.856449', 'step': 16269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:12.889787', 'step': 16269, 'epoch': 3} {'type': 'loss', 'content': 0.05480126664042473, 'timestamp': '2025-09-30 22:32:12.893388', 'step': 16270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:12.925990', 'step': 16270, 'epoch': 3} {'type': 'loss', 'content': 0.10091015696525574, 'timestamp': '2025-09-30 22:32:12.930271', 'step': 16271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:12.964604', 'step': 16271, 'epoch': 3} {'type': 'loss', 'content': 0.05367007851600647, 'timestamp': '2025-09-30 22:32:12.990200', 'step': 16272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.034415', 'step': 16272, 'epoch': 3} {'type': 'loss', 'content': 0.06614815443754196, 'timestamp': '2025-09-30 22:32:13.037168', 'step': 16273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.075853', 'step': 16273, 'epoch': 3} {'type': 'loss', 'content': 0.07504637539386749, 'timestamp': '2025-09-30 22:32:13.080782', 'step': 16274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.113818', 'step': 16274, 'epoch': 3} {'type': 'loss', 'content': 0.09811708331108093, 'timestamp': '2025-09-30 22:32:13.118551', 'step': 16275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.151797', 'step': 16275, 'epoch': 3} {'type': 'loss', 'content': 0.053696006536483765, 'timestamp': '2025-09-30 22:32:13.184760', 'step': 16276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:13.220627', 'step': 16276, 'epoch': 3} {'type': 'loss', 'content': 0.10461761802434921, 'timestamp': '2025-09-30 22:32:13.223254', 'step': 16277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.259636', 'step': 16277, 'epoch': 3} {'type': 'loss', 'content': 0.1373959332704544, 'timestamp': '2025-09-30 22:32:13.275499', 'step': 16278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:13.306726', 'step': 16278, 'epoch': 3} {'type': 'loss', 'content': 0.15796847641468048, 'timestamp': '2025-09-30 22:32:13.313081', 'step': 16279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.346176', 'step': 16279, 'epoch': 3} {'type': 'loss', 'content': 0.09360027313232422, 'timestamp': '2025-09-30 22:32:13.370034', 'step': 16280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.418648', 'step': 16280, 'epoch': 3} {'type': 'loss', 'content': 0.06325278431177139, 'timestamp': '2025-09-30 22:32:13.422829', 'step': 16281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.454384', 'step': 16281, 'epoch': 3} {'type': 'loss', 'content': 0.04540092498064041, 'timestamp': '2025-09-30 22:32:13.469668', 'step': 16282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:13.505548', 'step': 16282, 'epoch': 3} {'type': 'loss', 'content': 0.07634139806032181, 'timestamp': '2025-09-30 22:32:13.511054', 'step': 16283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.543167', 'step': 16283, 'epoch': 3} {'type': 'loss', 'content': 0.11871757358312607, 'timestamp': '2025-09-30 22:32:13.567856', 'step': 16284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.603608', 'step': 16284, 'epoch': 3} {'type': 'loss', 'content': 0.01560368575155735, 'timestamp': '2025-09-30 22:32:13.608430', 'step': 16285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:13.640357', 'step': 16285, 'epoch': 3} {'type': 'loss', 'content': 0.07989770919084549, 'timestamp': '2025-09-30 22:32:13.650368', 'step': 16286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.687578', 'step': 16286, 'epoch': 3} {'type': 'loss', 'content': 0.08965200930833817, 'timestamp': '2025-09-30 22:32:13.693808', 'step': 16287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.726427', 'step': 16287, 'epoch': 3} {'type': 'loss', 'content': 0.054070666432380676, 'timestamp': '2025-09-30 22:32:13.752834', 'step': 16288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.790099', 'step': 16288, 'epoch': 3} {'type': 'loss', 'content': 0.053966693580150604, 'timestamp': '2025-09-30 22:32:13.794126', 'step': 16289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:13.827785', 'step': 16289, 'epoch': 3} {'type': 'loss', 'content': 0.06642130762338638, 'timestamp': '2025-09-30 22:32:13.831090', 'step': 16290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:13.871413', 'step': 16290, 'epoch': 3} {'type': 'loss', 'content': 0.06372493505477905, 'timestamp': '2025-09-30 22:32:13.875789', 'step': 16291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:13.912330', 'step': 16291, 'epoch': 3} {'type': 'loss', 'content': 0.07131864875555038, 'timestamp': '2025-09-30 22:32:13.937158', 'step': 16292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:13.971359', 'step': 16292, 'epoch': 3} {'type': 'loss', 'content': 0.010420049540698528, 'timestamp': '2025-09-30 22:32:13.982032', 'step': 16293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:14.020281', 'step': 16293, 'epoch': 3} {'type': 'loss', 'content': 0.10364027321338654, 'timestamp': '2025-09-30 22:32:14.028334', 'step': 16294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:14.069572', 'step': 16294, 'epoch': 3} {'type': 'loss', 'content': 0.03854422643780708, 'timestamp': '2025-09-30 22:32:14.086910', 'step': 16295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.123047', 'step': 16295, 'epoch': 3} {'type': 'loss', 'content': 0.08523046225309372, 'timestamp': '2025-09-30 22:32:14.147478', 'step': 16296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:14.188450', 'step': 16296, 'epoch': 3} {'type': 'loss', 'content': 0.06944183260202408, 'timestamp': '2025-09-30 22:32:14.193838', 'step': 16297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.238313', 'step': 16297, 'epoch': 3} {'type': 'loss', 'content': 0.13704709708690643, 'timestamp': '2025-09-30 22:32:14.244642', 'step': 16298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:14.278592', 'step': 16298, 'epoch': 3} {'type': 'loss', 'content': 0.10205541551113129, 'timestamp': '2025-09-30 22:32:14.295958', 'step': 16299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.330722', 'step': 16299, 'epoch': 3} {'type': 'loss', 'content': 0.031313374638557434, 'timestamp': '2025-09-30 22:32:14.361305', 'step': 16300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:14.394408', 'step': 16300, 'epoch': 3} {'type': 'loss', 'content': 0.0660446286201477, 'timestamp': '2025-09-30 22:32:14.404397', 'step': 16301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:14.441519', 'step': 16301, 'epoch': 3} {'type': 'loss', 'content': 0.06580434739589691, 'timestamp': '2025-09-30 22:32:14.443923', 'step': 16302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.475058', 'step': 16302, 'epoch': 3} {'type': 'loss', 'content': 0.06358936429023743, 'timestamp': '2025-09-30 22:32:14.485818', 'step': 16303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.533405', 'step': 16303, 'epoch': 3} {'type': 'loss', 'content': 0.019599657505750656, 'timestamp': '2025-09-30 22:32:14.563802', 'step': 16304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:14.600430', 'step': 16304, 'epoch': 3} {'type': 'loss', 'content': 0.07662089914083481, 'timestamp': '2025-09-30 22:32:14.604994', 'step': 16305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:14.646743', 'step': 16305, 'epoch': 3} {'type': 'loss', 'content': 0.14146539568901062, 'timestamp': '2025-09-30 22:32:14.652878', 'step': 16306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:14.686450', 'step': 16306, 'epoch': 3} {'type': 'loss', 'content': 0.04478134959936142, 'timestamp': '2025-09-30 22:32:14.690938', 'step': 16307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:14.733876', 'step': 16307, 'epoch': 3} {'type': 'loss', 'content': 0.08956430107355118, 'timestamp': '2025-09-30 22:32:14.763455', 'step': 16308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.797175', 'step': 16308, 'epoch': 3} {'type': 'loss', 'content': 0.07828308641910553, 'timestamp': '2025-09-30 22:32:14.802300', 'step': 16309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:14.835664', 'step': 16309, 'epoch': 3} {'type': 'loss', 'content': 0.07650519162416458, 'timestamp': '2025-09-30 22:32:14.852864', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:32:23.408160', 'step': 16310, 'epoch': 3} {'type': 'pplx', 'content': 9772.30787511467, 'timestamp': '2025-09-30 22:32:23.412401', 'step': 16310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:23.474896', 'step': 16310, 'epoch': 3} {'type': 'loss', 'content': 0.07998206466436386, 'timestamp': '2025-09-30 22:32:23.478533', 'step': 16311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:23.519011', 'step': 16311, 'epoch': 3} {'type': 'loss', 'content': 0.023756643757224083, 'timestamp': '2025-09-30 22:32:23.544427', 'step': 16312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:23.584708', 'step': 16312, 'epoch': 3} {'type': 'loss', 'content': 0.0513312965631485, 'timestamp': '2025-09-30 22:32:23.593364', 'step': 16313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:23.631007', 'step': 16313, 'epoch': 3} {'type': 'loss', 'content': 0.08161334693431854, 'timestamp': '2025-09-30 22:32:23.633826', 'step': 16314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:23.692453', 'step': 16314, 'epoch': 3} {'type': 'loss', 'content': 0.06223728135228157, 'timestamp': '2025-09-30 22:32:23.702489', 'step': 16315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:23.748954', 'step': 16315, 'epoch': 3} {'type': 'loss', 'content': 0.147720068693161, 'timestamp': '2025-09-30 22:32:23.778825', 'step': 16316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:23.822612', 'step': 16316, 'epoch': 3} {'type': 'loss', 'content': 0.08559049665927887, 'timestamp': '2025-09-30 22:32:23.828881', 'step': 16317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:23.864473', 'step': 16317, 'epoch': 3} {'type': 'loss', 'content': 0.06030495464801788, 'timestamp': '2025-09-30 22:32:23.868345', 'step': 16318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:23.914376', 'step': 16318, 'epoch': 3} {'type': 'loss', 'content': 0.06050096079707146, 'timestamp': '2025-09-30 22:32:23.917125', 'step': 16319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:23.948319', 'step': 16319, 'epoch': 3} {'type': 'loss', 'content': 0.09045658260583878, 'timestamp': '2025-09-30 22:32:23.974246', 'step': 16320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:24.007881', 'step': 16320, 'epoch': 3} {'type': 'loss', 'content': 0.07348237931728363, 'timestamp': '2025-09-30 22:32:24.012103', 'step': 16321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.042930', 'step': 16321, 'epoch': 3} {'type': 'loss', 'content': 0.05582336708903313, 'timestamp': '2025-09-30 22:32:24.050042', 'step': 16322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:24.087501', 'step': 16322, 'epoch': 3} {'type': 'loss', 'content': 0.031636521220207214, 'timestamp': '2025-09-30 22:32:24.091227', 'step': 16323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.122499', 'step': 16323, 'epoch': 3} {'type': 'loss', 'content': 0.06713788211345673, 'timestamp': '2025-09-30 22:32:24.149715', 'step': 16324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.186696', 'step': 16324, 'epoch': 3} {'type': 'loss', 'content': 0.05891304835677147, 'timestamp': '2025-09-30 22:32:24.193365', 'step': 16325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:24.228832', 'step': 16325, 'epoch': 3} {'type': 'loss', 'content': 0.054879240691661835, 'timestamp': '2025-09-30 22:32:24.232052', 'step': 16326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:24.262784', 'step': 16326, 'epoch': 3} {'type': 'loss', 'content': 0.07324786484241486, 'timestamp': '2025-09-30 22:32:24.267992', 'step': 16327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.300093', 'step': 16327, 'epoch': 3} {'type': 'loss', 'content': 0.126107856631279, 'timestamp': '2025-09-30 22:32:24.324256', 'step': 16328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.355950', 'step': 16328, 'epoch': 3} {'type': 'loss', 'content': 0.039511922746896744, 'timestamp': '2025-09-30 22:32:24.358851', 'step': 16329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.391859', 'step': 16329, 'epoch': 3} {'type': 'loss', 'content': 0.04533812403678894, 'timestamp': '2025-09-30 22:32:24.396044', 'step': 16330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.429753', 'step': 16330, 'epoch': 3} {'type': 'loss', 'content': 0.027681024745106697, 'timestamp': '2025-09-30 22:32:24.433173', 'step': 16331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.467894', 'step': 16331, 'epoch': 3} {'type': 'loss', 'content': 0.060289375483989716, 'timestamp': '2025-09-30 22:32:24.503596', 'step': 16332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.538061', 'step': 16332, 'epoch': 3} {'type': 'loss', 'content': 0.10194116830825806, 'timestamp': '2025-09-30 22:32:24.541810', 'step': 16333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:24.578508', 'step': 16333, 'epoch': 3} {'type': 'loss', 'content': 0.0686626136302948, 'timestamp': '2025-09-30 22:32:24.585833', 'step': 16334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.620939', 'step': 16334, 'epoch': 3} {'type': 'loss', 'content': 0.10778337717056274, 'timestamp': '2025-09-30 22:32:24.632111', 'step': 16335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.672474', 'step': 16335, 'epoch': 3} {'type': 'loss', 'content': 0.07249946147203445, 'timestamp': '2025-09-30 22:32:24.696959', 'step': 16336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.727053', 'step': 16336, 'epoch': 3} {'type': 'loss', 'content': 0.15930651128292084, 'timestamp': '2025-09-30 22:32:24.731190', 'step': 16337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:24.764196', 'step': 16337, 'epoch': 3} {'type': 'loss', 'content': 0.08823766559362411, 'timestamp': '2025-09-30 22:32:24.769640', 'step': 16338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:24.802714', 'step': 16338, 'epoch': 3} {'type': 'loss', 'content': 0.10240616649389267, 'timestamp': '2025-09-30 22:32:24.805572', 'step': 16339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:24.836464', 'step': 16339, 'epoch': 3} {'type': 'loss', 'content': 0.16584143042564392, 'timestamp': '2025-09-30 22:32:24.861760', 'step': 16340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:24.893424', 'step': 16340, 'epoch': 3} {'type': 'loss', 'content': 0.024334343150258064, 'timestamp': '2025-09-30 22:32:24.897630', 'step': 16341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:24.938991', 'step': 16341, 'epoch': 3} {'type': 'loss', 'content': 0.1570945680141449, 'timestamp': '2025-09-30 22:32:24.943890', 'step': 16342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:24.974479', 'step': 16342, 'epoch': 3} {'type': 'loss', 'content': 0.04117691144347191, 'timestamp': '2025-09-30 22:32:24.976713', 'step': 16343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.007733', 'step': 16343, 'epoch': 3} {'type': 'loss', 'content': 0.11707913875579834, 'timestamp': '2025-09-30 22:32:25.042531', 'step': 16344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.073015', 'step': 16344, 'epoch': 3} {'type': 'loss', 'content': 0.09359339624643326, 'timestamp': '2025-09-30 22:32:25.079773', 'step': 16345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:25.110464', 'step': 16345, 'epoch': 3} {'type': 'loss', 'content': 0.13432088494300842, 'timestamp': '2025-09-30 22:32:25.112825', 'step': 16346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:25.147479', 'step': 16346, 'epoch': 3} {'type': 'loss', 'content': 0.05787273496389389, 'timestamp': '2025-09-30 22:32:25.150721', 'step': 16347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.181719', 'step': 16347, 'epoch': 3} {'type': 'loss', 'content': 0.06673529744148254, 'timestamp': '2025-09-30 22:32:25.205996', 'step': 16348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.236985', 'step': 16348, 'epoch': 3} {'type': 'loss', 'content': 0.01923881471157074, 'timestamp': '2025-09-30 22:32:25.240811', 'step': 16349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:25.274422', 'step': 16349, 'epoch': 3} {'type': 'loss', 'content': 0.08084359765052795, 'timestamp': '2025-09-30 22:32:25.277714', 'step': 16350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.309542', 'step': 16350, 'epoch': 3} {'type': 'loss', 'content': 0.09008152037858963, 'timestamp': '2025-09-30 22:32:25.313059', 'step': 16351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:25.348206', 'step': 16351, 'epoch': 3} {'type': 'loss', 'content': 0.05513770133256912, 'timestamp': '2025-09-30 22:32:25.380856', 'step': 16352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.413164', 'step': 16352, 'epoch': 3} {'type': 'loss', 'content': 0.061329904943704605, 'timestamp': '2025-09-30 22:32:25.415880', 'step': 16353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:25.446286', 'step': 16353, 'epoch': 3} {'type': 'loss', 'content': 0.10211661458015442, 'timestamp': '2025-09-30 22:32:25.462722', 'step': 16354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.505912', 'step': 16354, 'epoch': 3} {'type': 'loss', 'content': 0.05179460719227791, 'timestamp': '2025-09-30 22:32:25.509562', 'step': 16355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.540509', 'step': 16355, 'epoch': 3} {'type': 'loss', 'content': 0.04231581836938858, 'timestamp': '2025-09-30 22:32:25.572567', 'step': 16356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.602853', 'step': 16356, 'epoch': 3} {'type': 'loss', 'content': 0.06344106048345566, 'timestamp': '2025-09-30 22:32:25.606974', 'step': 16357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.640058', 'step': 16357, 'epoch': 3} {'type': 'loss', 'content': 0.06364163011312485, 'timestamp': '2025-09-30 22:32:25.647127', 'step': 16358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:25.678623', 'step': 16358, 'epoch': 3} {'type': 'loss', 'content': 0.049614012241363525, 'timestamp': '2025-09-30 22:32:25.684005', 'step': 16359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:25.714789', 'step': 16359, 'epoch': 3} {'type': 'loss', 'content': 0.08637918531894684, 'timestamp': '2025-09-30 22:32:25.739600', 'step': 16360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.770701', 'step': 16360, 'epoch': 3} {'type': 'loss', 'content': 0.054955918341875076, 'timestamp': '2025-09-30 22:32:25.785292', 'step': 16361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.822010', 'step': 16361, 'epoch': 3} {'type': 'loss', 'content': 0.11952755600214005, 'timestamp': '2025-09-30 22:32:25.825352', 'step': 16362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.867515', 'step': 16362, 'epoch': 3} {'type': 'loss', 'content': 0.08539410680532455, 'timestamp': '2025-09-30 22:32:25.872521', 'step': 16363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:25.906506', 'step': 16363, 'epoch': 3} {'type': 'loss', 'content': 0.13197512924671173, 'timestamp': '2025-09-30 22:32:25.930836', 'step': 16364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:25.964763', 'step': 16364, 'epoch': 3} {'type': 'loss', 'content': 0.17959557473659515, 'timestamp': '2025-09-30 22:32:25.967733', 'step': 16365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:26.003830', 'step': 16365, 'epoch': 3} {'type': 'loss', 'content': 0.023774566128849983, 'timestamp': '2025-09-30 22:32:26.008019', 'step': 16366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:26.041713', 'step': 16366, 'epoch': 3} {'type': 'loss', 'content': 0.07573188096284866, 'timestamp': '2025-09-30 22:32:26.045771', 'step': 16367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.078318', 'step': 16367, 'epoch': 3} {'type': 'loss', 'content': 0.09578771144151688, 'timestamp': '2025-09-30 22:32:26.103784', 'step': 16368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.135372', 'step': 16368, 'epoch': 3} {'type': 'loss', 'content': 0.041025370359420776, 'timestamp': '2025-09-30 22:32:26.140546', 'step': 16369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:26.173626', 'step': 16369, 'epoch': 3} {'type': 'loss', 'content': 0.10334772616624832, 'timestamp': '2025-09-30 22:32:26.176543', 'step': 16370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.210679', 'step': 16370, 'epoch': 3} {'type': 'loss', 'content': 0.11274757236242294, 'timestamp': '2025-09-30 22:32:26.215125', 'step': 16371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.246190', 'step': 16371, 'epoch': 3} {'type': 'loss', 'content': 0.07467690855264664, 'timestamp': '2025-09-30 22:32:26.271405', 'step': 16372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.315986', 'step': 16372, 'epoch': 3} {'type': 'loss', 'content': 0.1055285632610321, 'timestamp': '2025-09-30 22:32:26.321791', 'step': 16373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.357086', 'step': 16373, 'epoch': 3} {'type': 'loss', 'content': 0.017913777381181717, 'timestamp': '2025-09-30 22:32:26.360136', 'step': 16374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.393406', 'step': 16374, 'epoch': 3} {'type': 'loss', 'content': 0.11598187685012817, 'timestamp': '2025-09-30 22:32:26.396792', 'step': 16375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.432122', 'step': 16375, 'epoch': 3} {'type': 'loss', 'content': 0.026114683598279953, 'timestamp': '2025-09-30 22:32:26.457703', 'step': 16376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.500825', 'step': 16376, 'epoch': 3} {'type': 'loss', 'content': 0.013178141787648201, 'timestamp': '2025-09-30 22:32:26.506238', 'step': 16377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.538974', 'step': 16377, 'epoch': 3} {'type': 'loss', 'content': 0.030497867614030838, 'timestamp': '2025-09-30 22:32:26.542123', 'step': 16378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.572325', 'step': 16378, 'epoch': 3} {'type': 'loss', 'content': 0.06094939634203911, 'timestamp': '2025-09-30 22:32:26.575452', 'step': 16379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.611184', 'step': 16379, 'epoch': 3} {'type': 'loss', 'content': 0.060638707131147385, 'timestamp': '2025-09-30 22:32:26.636454', 'step': 16380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:26.667662', 'step': 16380, 'epoch': 3} {'type': 'loss', 'content': 0.05393877625465393, 'timestamp': '2025-09-30 22:32:26.680297', 'step': 16381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.718195', 'step': 16381, 'epoch': 3} {'type': 'loss', 'content': 0.08263243734836578, 'timestamp': '2025-09-30 22:32:26.722429', 'step': 16382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.756805', 'step': 16382, 'epoch': 3} {'type': 'loss', 'content': 0.1164470762014389, 'timestamp': '2025-09-30 22:32:26.764334', 'step': 16383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.804047', 'step': 16383, 'epoch': 3} {'type': 'loss', 'content': 0.10960210859775543, 'timestamp': '2025-09-30 22:32:26.829390', 'step': 16384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:26.859912', 'step': 16384, 'epoch': 3} {'type': 'loss', 'content': 0.08274572342634201, 'timestamp': '2025-09-30 22:32:26.863957', 'step': 16385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.895668', 'step': 16385, 'epoch': 3} {'type': 'loss', 'content': 0.06934181600809097, 'timestamp': '2025-09-30 22:32:26.898953', 'step': 16386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:26.939675', 'step': 16386, 'epoch': 3} {'type': 'loss', 'content': 0.0678032785654068, 'timestamp': '2025-09-30 22:32:26.943672', 'step': 16387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:26.975708', 'step': 16387, 'epoch': 3} {'type': 'loss', 'content': 0.17984645068645477, 'timestamp': '2025-09-30 22:32:27.001964', 'step': 16388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:27.034449', 'step': 16388, 'epoch': 3} {'type': 'loss', 'content': 0.1709211766719818, 'timestamp': '2025-09-30 22:32:27.037109', 'step': 16389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:27.067265', 'step': 16389, 'epoch': 3} {'type': 'loss', 'content': 0.08379768580198288, 'timestamp': '2025-09-30 22:32:27.071405', 'step': 16390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.109638', 'step': 16390, 'epoch': 3} {'type': 'loss', 'content': 0.050986696034669876, 'timestamp': '2025-09-30 22:32:27.113745', 'step': 16391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.147163', 'step': 16391, 'epoch': 3} {'type': 'loss', 'content': 0.04234150052070618, 'timestamp': '2025-09-30 22:32:27.174839', 'step': 16392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:27.213695', 'step': 16392, 'epoch': 3} {'type': 'loss', 'content': 0.032693199813365936, 'timestamp': '2025-09-30 22:32:27.216483', 'step': 16393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.247600', 'step': 16393, 'epoch': 3} {'type': 'loss', 'content': 0.06252416223287582, 'timestamp': '2025-09-30 22:32:27.260370', 'step': 16394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.294290', 'step': 16394, 'epoch': 3} {'type': 'loss', 'content': 0.05712072551250458, 'timestamp': '2025-09-30 22:32:27.312438', 'step': 16395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:27.343171', 'step': 16395, 'epoch': 3} {'type': 'loss', 'content': 0.0809580609202385, 'timestamp': '2025-09-30 22:32:27.370515', 'step': 16396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.402813', 'step': 16396, 'epoch': 3} {'type': 'loss', 'content': 0.11910955607891083, 'timestamp': '2025-09-30 22:32:27.406989', 'step': 16397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.438364', 'step': 16397, 'epoch': 3} {'type': 'loss', 'content': 0.030514094978570938, 'timestamp': '2025-09-30 22:32:27.442557', 'step': 16398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:27.475276', 'step': 16398, 'epoch': 3} {'type': 'loss', 'content': 0.09550256282091141, 'timestamp': '2025-09-30 22:32:27.480346', 'step': 16399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.520450', 'step': 16399, 'epoch': 3} {'type': 'loss', 'content': 0.06569226831197739, 'timestamp': '2025-09-30 22:32:27.547117', 'step': 16400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.579325', 'step': 16400, 'epoch': 3} {'type': 'loss', 'content': 0.06738749146461487, 'timestamp': '2025-09-30 22:32:27.583708', 'step': 16401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.615901', 'step': 16401, 'epoch': 3} {'type': 'loss', 'content': 0.06657076627016068, 'timestamp': '2025-09-30 22:32:27.619773', 'step': 16402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.653928', 'step': 16402, 'epoch': 3} {'type': 'loss', 'content': 0.05155330151319504, 'timestamp': '2025-09-30 22:32:27.658146', 'step': 16403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.687912', 'step': 16403, 'epoch': 3} {'type': 'loss', 'content': 0.04459698870778084, 'timestamp': '2025-09-30 22:32:27.714726', 'step': 16404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.746540', 'step': 16404, 'epoch': 3} {'type': 'loss', 'content': 0.04778330773115158, 'timestamp': '2025-09-30 22:32:27.748955', 'step': 16405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:27.780618', 'step': 16405, 'epoch': 3} {'type': 'loss', 'content': 0.04249320179224014, 'timestamp': '2025-09-30 22:32:27.784068', 'step': 16406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.816423', 'step': 16406, 'epoch': 3} {'type': 'loss', 'content': 0.057494159787893295, 'timestamp': '2025-09-30 22:32:27.820053', 'step': 16407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.853184', 'step': 16407, 'epoch': 3} {'type': 'loss', 'content': 0.08100544661283493, 'timestamp': '2025-09-30 22:32:27.879963', 'step': 16408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:27.922346', 'step': 16408, 'epoch': 3} {'type': 'loss', 'content': 0.08527418226003647, 'timestamp': '2025-09-30 22:32:27.925391', 'step': 16409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.957827', 'step': 16409, 'epoch': 3} {'type': 'loss', 'content': 0.0964323878288269, 'timestamp': '2025-09-30 22:32:27.960870', 'step': 16410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:27.992536', 'step': 16410, 'epoch': 3} {'type': 'loss', 'content': 0.08298730850219727, 'timestamp': '2025-09-30 22:32:27.996812', 'step': 16411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.027383', 'step': 16411, 'epoch': 3} {'type': 'loss', 'content': 0.037746086716651917, 'timestamp': '2025-09-30 22:32:28.053240', 'step': 16412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.086338', 'step': 16412, 'epoch': 3} {'type': 'loss', 'content': 0.050931621342897415, 'timestamp': '2025-09-30 22:32:28.092554', 'step': 16413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.135630', 'step': 16413, 'epoch': 3} {'type': 'loss', 'content': 0.04546563699841499, 'timestamp': '2025-09-30 22:32:28.138182', 'step': 16414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:28.169838', 'step': 16414, 'epoch': 3} {'type': 'loss', 'content': 0.061832886189222336, 'timestamp': '2025-09-30 22:32:28.175928', 'step': 16415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.217716', 'step': 16415, 'epoch': 3} {'type': 'loss', 'content': 0.12122414261102676, 'timestamp': '2025-09-30 22:32:28.241665', 'step': 16416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.272853', 'step': 16416, 'epoch': 3} {'type': 'loss', 'content': 0.053908515721559525, 'timestamp': '2025-09-30 22:32:28.275366', 'step': 16417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:28.305958', 'step': 16417, 'epoch': 3} {'type': 'loss', 'content': 0.07352228462696075, 'timestamp': '2025-09-30 22:32:28.308812', 'step': 16418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:28.339655', 'step': 16418, 'epoch': 3} {'type': 'loss', 'content': 0.02223590947687626, 'timestamp': '2025-09-30 22:32:28.342437', 'step': 16419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.373167', 'step': 16419, 'epoch': 3} {'type': 'loss', 'content': 0.10317175835371017, 'timestamp': '2025-09-30 22:32:28.398087', 'step': 16420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.429431', 'step': 16420, 'epoch': 3} {'type': 'loss', 'content': 0.11593201756477356, 'timestamp': '2025-09-30 22:32:28.433558', 'step': 16421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.468543', 'step': 16421, 'epoch': 3} {'type': 'loss', 'content': 0.10318855196237564, 'timestamp': '2025-09-30 22:32:28.473088', 'step': 16422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.506613', 'step': 16422, 'epoch': 3} {'type': 'loss', 'content': 0.08070597052574158, 'timestamp': '2025-09-30 22:32:28.508873', 'step': 16423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.544349', 'step': 16423, 'epoch': 3} {'type': 'loss', 'content': 0.0838978961110115, 'timestamp': '2025-09-30 22:32:28.570003', 'step': 16424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.603956', 'step': 16424, 'epoch': 3} {'type': 'loss', 'content': 0.09392241388559341, 'timestamp': '2025-09-30 22:32:28.607120', 'step': 16425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.650926', 'step': 16425, 'epoch': 3} {'type': 'loss', 'content': 0.10072076320648193, 'timestamp': '2025-09-30 22:32:28.654837', 'step': 16426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.686926', 'step': 16426, 'epoch': 3} {'type': 'loss', 'content': 0.01294198539108038, 'timestamp': '2025-09-30 22:32:28.691287', 'step': 16427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.731011', 'step': 16427, 'epoch': 3} {'type': 'loss', 'content': 0.03756196051836014, 'timestamp': '2025-09-30 22:32:28.755496', 'step': 16428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:28.786164', 'step': 16428, 'epoch': 3} {'type': 'loss', 'content': 0.07822732627391815, 'timestamp': '2025-09-30 22:32:28.789652', 'step': 16429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:28.821306', 'step': 16429, 'epoch': 3} {'type': 'loss', 'content': 0.06842250376939774, 'timestamp': '2025-09-30 22:32:28.824499', 'step': 16430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:28.855635', 'step': 16430, 'epoch': 3} {'type': 'loss', 'content': 0.056833136826753616, 'timestamp': '2025-09-30 22:32:28.859933', 'step': 16431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.892168', 'step': 16431, 'epoch': 3} {'type': 'loss', 'content': 0.06586924940347672, 'timestamp': '2025-09-30 22:32:28.916514', 'step': 16432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.948598', 'step': 16432, 'epoch': 3} {'type': 'loss', 'content': 0.08532121777534485, 'timestamp': '2025-09-30 22:32:28.952451', 'step': 16433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:28.983277', 'step': 16433, 'epoch': 3} {'type': 'loss', 'content': 0.11608455330133438, 'timestamp': '2025-09-30 22:32:28.986996', 'step': 16434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:29.018620', 'step': 16434, 'epoch': 3} {'type': 'loss', 'content': 0.05154209956526756, 'timestamp': '2025-09-30 22:32:29.026665', 'step': 16435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.057194', 'step': 16435, 'epoch': 3} {'type': 'loss', 'content': 0.06489919871091843, 'timestamp': '2025-09-30 22:32:29.083615', 'step': 16436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.114818', 'step': 16436, 'epoch': 3} {'type': 'loss', 'content': 0.06489922851324081, 'timestamp': '2025-09-30 22:32:29.118186', 'step': 16437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.149407', 'step': 16437, 'epoch': 3} {'type': 'loss', 'content': 0.0410623662173748, 'timestamp': '2025-09-30 22:32:29.158124', 'step': 16438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:29.194654', 'step': 16438, 'epoch': 3} {'type': 'loss', 'content': 0.10472515970468521, 'timestamp': '2025-09-30 22:32:29.198057', 'step': 16439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.229508', 'step': 16439, 'epoch': 3} {'type': 'loss', 'content': 0.017089661210775375, 'timestamp': '2025-09-30 22:32:29.256238', 'step': 16440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.297454', 'step': 16440, 'epoch': 3} {'type': 'loss', 'content': 0.017295509576797485, 'timestamp': '2025-09-30 22:32:29.301120', 'step': 16441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:29.331113', 'step': 16441, 'epoch': 3} {'type': 'loss', 'content': 0.04094746708869934, 'timestamp': '2025-09-30 22:32:29.334783', 'step': 16442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:29.367397', 'step': 16442, 'epoch': 3} {'type': 'loss', 'content': 0.04848838970065117, 'timestamp': '2025-09-30 22:32:29.371567', 'step': 16443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:29.405479', 'step': 16443, 'epoch': 3} {'type': 'loss', 'content': 0.04956798627972603, 'timestamp': '2025-09-30 22:32:29.430718', 'step': 16444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:29.461302', 'step': 16444, 'epoch': 3} {'type': 'loss', 'content': 0.10864030569791794, 'timestamp': '2025-09-30 22:32:29.464332', 'step': 16445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:29.498131', 'step': 16445, 'epoch': 3} {'type': 'loss', 'content': 0.07820280641317368, 'timestamp': '2025-09-30 22:32:29.501965', 'step': 16446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.536946', 'step': 16446, 'epoch': 3} {'type': 'loss', 'content': 0.07743455469608307, 'timestamp': '2025-09-30 22:32:29.539614', 'step': 16447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:29.570523', 'step': 16447, 'epoch': 3} {'type': 'loss', 'content': 0.08833475410938263, 'timestamp': '2025-09-30 22:32:29.594451', 'step': 16448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.626140', 'step': 16448, 'epoch': 3} {'type': 'loss', 'content': 0.05597833916544914, 'timestamp': '2025-09-30 22:32:29.630104', 'step': 16449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:29.662509', 'step': 16449, 'epoch': 3} {'type': 'loss', 'content': 0.05012357980012894, 'timestamp': '2025-09-30 22:32:29.665736', 'step': 16450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:29.698459', 'step': 16450, 'epoch': 3} {'type': 'loss', 'content': 0.052152495831251144, 'timestamp': '2025-09-30 22:32:29.701233', 'step': 16451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:29.735538', 'step': 16451, 'epoch': 3} {'type': 'loss', 'content': 0.060304298996925354, 'timestamp': '2025-09-30 22:32:29.759731', 'step': 16452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:29.790223', 'step': 16452, 'epoch': 3} {'type': 'loss', 'content': 0.04084724187850952, 'timestamp': '2025-09-30 22:32:29.794286', 'step': 16453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:29.824974', 'step': 16453, 'epoch': 3} {'type': 'loss', 'content': 0.044774819165468216, 'timestamp': '2025-09-30 22:32:29.828125', 'step': 16454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.858744', 'step': 16454, 'epoch': 3} {'type': 'loss', 'content': 0.09737399965524673, 'timestamp': '2025-09-30 22:32:29.865773', 'step': 16455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:29.898564', 'step': 16455, 'epoch': 3} {'type': 'loss', 'content': 0.030936945229768753, 'timestamp': '2025-09-30 22:32:29.923923', 'step': 16456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:29.955156', 'step': 16456, 'epoch': 3} {'type': 'loss', 'content': 0.013623622246086597, 'timestamp': '2025-09-30 22:32:29.961592', 'step': 16457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:29.998121', 'step': 16457, 'epoch': 3} {'type': 'loss', 'content': 0.0975547805428505, 'timestamp': '2025-09-30 22:32:30.001319', 'step': 16458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.032644', 'step': 16458, 'epoch': 3} {'type': 'loss', 'content': 0.0398578979074955, 'timestamp': '2025-09-30 22:32:30.035053', 'step': 16459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:30.069886', 'step': 16459, 'epoch': 3} {'type': 'loss', 'content': 0.02850879728794098, 'timestamp': '2025-09-30 22:32:30.094519', 'step': 16460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.126578', 'step': 16460, 'epoch': 3} {'type': 'loss', 'content': 0.1028660386800766, 'timestamp': '2025-09-30 22:32:30.130151', 'step': 16461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:30.161343', 'step': 16461, 'epoch': 3} {'type': 'loss', 'content': 0.05220577493309975, 'timestamp': '2025-09-30 22:32:30.165434', 'step': 16462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.199166', 'step': 16462, 'epoch': 3} {'type': 'loss', 'content': 0.03938940167427063, 'timestamp': '2025-09-30 22:32:30.203350', 'step': 16463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.241689', 'step': 16463, 'epoch': 3} {'type': 'loss', 'content': 0.07411430776119232, 'timestamp': '2025-09-30 22:32:30.273965', 'step': 16464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.305005', 'step': 16464, 'epoch': 3} {'type': 'loss', 'content': 0.09169698506593704, 'timestamp': '2025-09-30 22:32:30.311095', 'step': 16465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.351536', 'step': 16465, 'epoch': 3} {'type': 'loss', 'content': 0.030225630849599838, 'timestamp': '2025-09-30 22:32:30.362669', 'step': 16466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:30.394754', 'step': 16466, 'epoch': 3} {'type': 'loss', 'content': 0.05722043663263321, 'timestamp': '2025-09-30 22:32:30.403535', 'step': 16467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:30.434441', 'step': 16467, 'epoch': 3} {'type': 'loss', 'content': 0.050700798630714417, 'timestamp': '2025-09-30 22:32:30.461823', 'step': 16468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:30.493755', 'step': 16468, 'epoch': 3} {'type': 'loss', 'content': 0.08534978330135345, 'timestamp': '2025-09-30 22:32:30.496053', 'step': 16469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:30.526780', 'step': 16469, 'epoch': 3} {'type': 'loss', 'content': 0.0595112182199955, 'timestamp': '2025-09-30 22:32:30.532420', 'step': 16470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:30.565503', 'step': 16470, 'epoch': 3} {'type': 'loss', 'content': 0.059901509433984756, 'timestamp': '2025-09-30 22:32:30.572338', 'step': 16471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.606868', 'step': 16471, 'epoch': 3} {'type': 'loss', 'content': 0.026369983330368996, 'timestamp': '2025-09-30 22:32:30.631886', 'step': 16472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.663808', 'step': 16472, 'epoch': 3} {'type': 'loss', 'content': 0.08893471956253052, 'timestamp': '2025-09-30 22:32:30.675518', 'step': 16473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:30.706915', 'step': 16473, 'epoch': 3} {'type': 'loss', 'content': 0.07131467014551163, 'timestamp': '2025-09-30 22:32:30.709345', 'step': 16474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.741401', 'step': 16474, 'epoch': 3} {'type': 'loss', 'content': 0.10602406412363052, 'timestamp': '2025-09-30 22:32:30.744854', 'step': 16475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:30.776819', 'step': 16475, 'epoch': 3} {'type': 'loss', 'content': 0.0913824588060379, 'timestamp': '2025-09-30 22:32:30.801795', 'step': 16476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.833529', 'step': 16476, 'epoch': 3} {'type': 'loss', 'content': 0.05259297788143158, 'timestamp': '2025-09-30 22:32:30.837426', 'step': 16477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.873740', 'step': 16477, 'epoch': 3} {'type': 'loss', 'content': 0.03547072038054466, 'timestamp': '2025-09-30 22:32:30.878570', 'step': 16478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:30.910592', 'step': 16478, 'epoch': 3} {'type': 'loss', 'content': 0.04901067912578583, 'timestamp': '2025-09-30 22:32:30.914875', 'step': 16479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:30.958439', 'step': 16479, 'epoch': 3} {'type': 'loss', 'content': 0.08264084160327911, 'timestamp': '2025-09-30 22:32:30.984954', 'step': 16480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.016196', 'step': 16480, 'epoch': 3} {'type': 'loss', 'content': 0.04174768552184105, 'timestamp': '2025-09-30 22:32:31.018254', 'step': 16481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:31.049093', 'step': 16481, 'epoch': 3} {'type': 'loss', 'content': 0.008230164647102356, 'timestamp': '2025-09-30 22:32:31.054137', 'step': 16482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:31.086550', 'step': 16482, 'epoch': 3} {'type': 'loss', 'content': 0.06436539441347122, 'timestamp': '2025-09-30 22:32:31.089573', 'step': 16483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.120341', 'step': 16483, 'epoch': 3} {'type': 'loss', 'content': 0.023988505825400352, 'timestamp': '2025-09-30 22:32:31.149917', 'step': 16484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:31.180060', 'step': 16484, 'epoch': 3} {'type': 'loss', 'content': 0.058321043848991394, 'timestamp': '2025-09-30 22:32:31.189770', 'step': 16485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:31.229111', 'step': 16485, 'epoch': 3} {'type': 'loss', 'content': 0.06494739651679993, 'timestamp': '2025-09-30 22:32:31.232653', 'step': 16486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:31.264190', 'step': 16486, 'epoch': 3} {'type': 'loss', 'content': 0.033767785876989365, 'timestamp': '2025-09-30 22:32:31.268533', 'step': 16487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.306076', 'step': 16487, 'epoch': 3} {'type': 'loss', 'content': 0.05402529612183571, 'timestamp': '2025-09-30 22:32:31.330698', 'step': 16488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:31.362321', 'step': 16488, 'epoch': 3} {'type': 'loss', 'content': 0.08490277081727982, 'timestamp': '2025-09-30 22:32:31.366006', 'step': 16489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:31.397638', 'step': 16489, 'epoch': 3} {'type': 'loss', 'content': 0.10672301054000854, 'timestamp': '2025-09-30 22:32:31.401486', 'step': 16490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:31.434112', 'step': 16490, 'epoch': 3} {'type': 'loss', 'content': 0.04807401821017265, 'timestamp': '2025-09-30 22:32:31.438059', 'step': 16491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:31.469537', 'step': 16491, 'epoch': 3} {'type': 'loss', 'content': 0.07330591976642609, 'timestamp': '2025-09-30 22:32:31.494664', 'step': 16492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:31.527879', 'step': 16492, 'epoch': 3} {'type': 'loss', 'content': 0.03136062994599342, 'timestamp': '2025-09-30 22:32:31.531792', 'step': 16493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:31.566325', 'step': 16493, 'epoch': 3} {'type': 'loss', 'content': 0.05672845616936684, 'timestamp': '2025-09-30 22:32:31.571510', 'step': 16494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.611327', 'step': 16494, 'epoch': 3} {'type': 'loss', 'content': 0.11723129451274872, 'timestamp': '2025-09-30 22:32:31.614576', 'step': 16495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:31.649365', 'step': 16495, 'epoch': 3} {'type': 'loss', 'content': 0.08198747038841248, 'timestamp': '2025-09-30 22:32:31.673564', 'step': 16496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.707944', 'step': 16496, 'epoch': 3} {'type': 'loss', 'content': 0.0658048614859581, 'timestamp': '2025-09-30 22:32:31.711046', 'step': 16497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.744897', 'step': 16497, 'epoch': 3} {'type': 'loss', 'content': 0.09569953382015228, 'timestamp': '2025-09-30 22:32:31.748566', 'step': 16498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.781614', 'step': 16498, 'epoch': 3} {'type': 'loss', 'content': 0.035356201231479645, 'timestamp': '2025-09-30 22:32:31.784463', 'step': 16499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:31.814819', 'step': 16499, 'epoch': 3} {'type': 'loss', 'content': 0.06051013991236687, 'timestamp': '2025-09-30 22:32:31.839775', 'step': 16500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 16500', 'timestamp': '2025-09-30 22:32:37.102505', 'step': 16500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:37.142007', 'step': 16500, 'epoch': 3} {'type': 'loss', 'content': 0.046872787177562714, 'timestamp': '2025-09-30 22:32:37.149574', 'step': 16501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.180143', 'step': 16501, 'epoch': 3} {'type': 'loss', 'content': 0.15558023750782013, 'timestamp': '2025-09-30 22:32:37.182226', 'step': 16502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.214628', 'step': 16502, 'epoch': 3} {'type': 'loss', 'content': 0.07082492113113403, 'timestamp': '2025-09-30 22:32:37.217282', 'step': 16503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.248275', 'step': 16503, 'epoch': 3} {'type': 'loss', 'content': 0.04417119175195694, 'timestamp': '2025-09-30 22:32:37.272705', 'step': 16504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.303945', 'step': 16504, 'epoch': 3} {'type': 'loss', 'content': 0.061276454478502274, 'timestamp': '2025-09-30 22:32:37.306896', 'step': 16505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.337703', 'step': 16505, 'epoch': 3} {'type': 'loss', 'content': 0.07264285534620285, 'timestamp': '2025-09-30 22:32:37.340694', 'step': 16506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:37.371768', 'step': 16506, 'epoch': 3} {'type': 'loss', 'content': 0.09386903792619705, 'timestamp': '2025-09-30 22:32:37.375487', 'step': 16507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.407772', 'step': 16507, 'epoch': 3} {'type': 'loss', 'content': 0.020764825865626335, 'timestamp': '2025-09-30 22:32:37.431839', 'step': 16508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:37.464280', 'step': 16508, 'epoch': 3} {'type': 'loss', 'content': 0.02209429070353508, 'timestamp': '2025-09-30 22:32:37.468961', 'step': 16509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.500043', 'step': 16509, 'epoch': 3} {'type': 'loss', 'content': 0.09649855643510818, 'timestamp': '2025-09-30 22:32:37.503548', 'step': 16510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.544533', 'step': 16510, 'epoch': 3} {'type': 'loss', 'content': 0.09211796522140503, 'timestamp': '2025-09-30 22:32:37.548878', 'step': 16511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:37.579810', 'step': 16511, 'epoch': 3} {'type': 'loss', 'content': 0.07796324044466019, 'timestamp': '2025-09-30 22:32:37.605552', 'step': 16512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.636645', 'step': 16512, 'epoch': 3} {'type': 'loss', 'content': 0.08130618184804916, 'timestamp': '2025-09-30 22:32:37.639382', 'step': 16513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.668826', 'step': 16513, 'epoch': 3} {'type': 'loss', 'content': 0.06075169891119003, 'timestamp': '2025-09-30 22:32:37.671420', 'step': 16514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.714462', 'step': 16514, 'epoch': 3} {'type': 'loss', 'content': 0.025414150208234787, 'timestamp': '2025-09-30 22:32:37.718294', 'step': 16515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:37.749399', 'step': 16515, 'epoch': 3} {'type': 'loss', 'content': 0.06702212244272232, 'timestamp': '2025-09-30 22:32:37.773209', 'step': 16516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.804627', 'step': 16516, 'epoch': 3} {'type': 'loss', 'content': 0.09043051302433014, 'timestamp': '2025-09-30 22:32:37.807001', 'step': 16517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.836177', 'step': 16517, 'epoch': 3} {'type': 'loss', 'content': 0.04629521444439888, 'timestamp': '2025-09-30 22:32:37.838706', 'step': 16518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:37.868511', 'step': 16518, 'epoch': 3} {'type': 'loss', 'content': 0.052224136888980865, 'timestamp': '2025-09-30 22:32:37.872958', 'step': 16519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:37.902883', 'step': 16519, 'epoch': 3} {'type': 'loss', 'content': 0.13017985224723816, 'timestamp': '2025-09-30 22:32:37.926672', 'step': 16520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.958276', 'step': 16520, 'epoch': 3} {'type': 'loss', 'content': 0.04654095321893692, 'timestamp': '2025-09-30 22:32:37.963327', 'step': 16521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:37.999839', 'step': 16521, 'epoch': 3} {'type': 'loss', 'content': 0.07450044900178909, 'timestamp': '2025-09-30 22:32:38.002931', 'step': 16522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:38.034160', 'step': 16522, 'epoch': 3} {'type': 'loss', 'content': 0.05750947818160057, 'timestamp': '2025-09-30 22:32:38.038371', 'step': 16523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.070777', 'step': 16523, 'epoch': 3} {'type': 'loss', 'content': 0.08272382616996765, 'timestamp': '2025-09-30 22:32:38.094709', 'step': 16524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.124550', 'step': 16524, 'epoch': 3} {'type': 'loss', 'content': 0.014578639529645443, 'timestamp': '2025-09-30 22:32:38.126744', 'step': 16525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.157870', 'step': 16525, 'epoch': 3} {'type': 'loss', 'content': 0.022705333307385445, 'timestamp': '2025-09-30 22:32:38.163666', 'step': 16526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:38.195232', 'step': 16526, 'epoch': 3} {'type': 'loss', 'content': 0.023036178201436996, 'timestamp': '2025-09-30 22:32:38.198736', 'step': 16527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.229524', 'step': 16527, 'epoch': 3} {'type': 'loss', 'content': 0.06604138761758804, 'timestamp': '2025-09-30 22:32:38.254144', 'step': 16528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.288714', 'step': 16528, 'epoch': 3} {'type': 'loss', 'content': 0.04195971414446831, 'timestamp': '2025-09-30 22:32:38.292342', 'step': 16529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.322178', 'step': 16529, 'epoch': 3} {'type': 'loss', 'content': 0.0625089630484581, 'timestamp': '2025-09-30 22:32:38.325467', 'step': 16530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.358463', 'step': 16530, 'epoch': 3} {'type': 'loss', 'content': 0.09978443384170532, 'timestamp': '2025-09-30 22:32:38.361241', 'step': 16531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.391344', 'step': 16531, 'epoch': 3} {'type': 'loss', 'content': 0.08996424823999405, 'timestamp': '2025-09-30 22:32:38.415736', 'step': 16532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.447059', 'step': 16532, 'epoch': 3} {'type': 'loss', 'content': 0.08323036879301071, 'timestamp': '2025-09-30 22:32:38.451815', 'step': 16533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.484182', 'step': 16533, 'epoch': 3} {'type': 'loss', 'content': 0.04986443743109703, 'timestamp': '2025-09-30 22:32:38.487374', 'step': 16534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.517816', 'step': 16534, 'epoch': 3} {'type': 'loss', 'content': 0.19061288237571716, 'timestamp': '2025-09-30 22:32:38.522653', 'step': 16535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.558396', 'step': 16535, 'epoch': 3} {'type': 'loss', 'content': 0.0803031325340271, 'timestamp': '2025-09-30 22:32:38.582080', 'step': 16536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.616276', 'step': 16536, 'epoch': 3} {'type': 'loss', 'content': 0.05884991213679314, 'timestamp': '2025-09-30 22:32:38.619546', 'step': 16537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.649593', 'step': 16537, 'epoch': 3} {'type': 'loss', 'content': 0.054901473224163055, 'timestamp': '2025-09-30 22:32:38.656570', 'step': 16538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.686559', 'step': 16538, 'epoch': 3} {'type': 'loss', 'content': 0.05333893373608589, 'timestamp': '2025-09-30 22:32:38.691783', 'step': 16539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:38.722201', 'step': 16539, 'epoch': 3} {'type': 'loss', 'content': 0.05444968119263649, 'timestamp': '2025-09-30 22:32:38.746725', 'step': 16540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.777046', 'step': 16540, 'epoch': 3} {'type': 'loss', 'content': 0.11831621825695038, 'timestamp': '2025-09-30 22:32:38.779471', 'step': 16541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:38.811321', 'step': 16541, 'epoch': 3} {'type': 'loss', 'content': 0.05576010048389435, 'timestamp': '2025-09-30 22:32:38.814633', 'step': 16542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:38.846503', 'step': 16542, 'epoch': 3} {'type': 'loss', 'content': 0.1182270422577858, 'timestamp': '2025-09-30 22:32:38.849256', 'step': 16543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.879556', 'step': 16543, 'epoch': 3} {'type': 'loss', 'content': 0.08051697909832001, 'timestamp': '2025-09-30 22:32:38.906276', 'step': 16544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.937722', 'step': 16544, 'epoch': 3} {'type': 'loss', 'content': 0.06207166239619255, 'timestamp': '2025-09-30 22:32:38.940710', 'step': 16545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:38.972569', 'step': 16545, 'epoch': 3} {'type': 'loss', 'content': 0.07422605901956558, 'timestamp': '2025-09-30 22:32:38.975927', 'step': 16546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:39.018510', 'step': 16546, 'epoch': 3} {'type': 'loss', 'content': 0.08071035146713257, 'timestamp': '2025-09-30 22:32:39.021184', 'step': 16547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.051932', 'step': 16547, 'epoch': 3} {'type': 'loss', 'content': 0.0394553579390049, 'timestamp': '2025-09-30 22:32:39.075708', 'step': 16548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.106983', 'step': 16548, 'epoch': 3} {'type': 'loss', 'content': 0.05013303831219673, 'timestamp': '2025-09-30 22:32:39.110276', 'step': 16549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:39.142107', 'step': 16549, 'epoch': 3} {'type': 'loss', 'content': 0.1252124160528183, 'timestamp': '2025-09-30 22:32:39.146788', 'step': 16550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.178333', 'step': 16550, 'epoch': 3} {'type': 'loss', 'content': 0.037505634129047394, 'timestamp': '2025-09-30 22:32:39.180936', 'step': 16551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:39.211816', 'step': 16551, 'epoch': 3} {'type': 'loss', 'content': 0.07929810881614685, 'timestamp': '2025-09-30 22:32:39.236140', 'step': 16552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:39.267803', 'step': 16552, 'epoch': 3} {'type': 'loss', 'content': 0.06458942592144012, 'timestamp': '2025-09-30 22:32:39.283164', 'step': 16553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.312793', 'step': 16553, 'epoch': 3} {'type': 'loss', 'content': 0.11180032789707184, 'timestamp': '2025-09-30 22:32:39.315855', 'step': 16554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.347647', 'step': 16554, 'epoch': 3} {'type': 'loss', 'content': 0.062155820429325104, 'timestamp': '2025-09-30 22:32:39.350241', 'step': 16555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.380477', 'step': 16555, 'epoch': 3} {'type': 'loss', 'content': 0.03850801661610603, 'timestamp': '2025-09-30 22:32:39.406324', 'step': 16556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.437536', 'step': 16556, 'epoch': 3} {'type': 'loss', 'content': 0.1237235814332962, 'timestamp': '2025-09-30 22:32:39.441481', 'step': 16557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.473034', 'step': 16557, 'epoch': 3} {'type': 'loss', 'content': 0.0785883292555809, 'timestamp': '2025-09-30 22:32:39.487025', 'step': 16558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.517883', 'step': 16558, 'epoch': 3} {'type': 'loss', 'content': 0.10782039165496826, 'timestamp': '2025-09-30 22:32:39.522040', 'step': 16559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:39.561277', 'step': 16559, 'epoch': 3} {'type': 'loss', 'content': 0.02121340110898018, 'timestamp': '2025-09-30 22:32:39.585995', 'step': 16560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:39.617264', 'step': 16560, 'epoch': 3} {'type': 'loss', 'content': 0.12323921173810959, 'timestamp': '2025-09-30 22:32:39.624682', 'step': 16561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:39.657556', 'step': 16561, 'epoch': 3} {'type': 'loss', 'content': 0.0664147287607193, 'timestamp': '2025-09-30 22:32:39.659721', 'step': 16562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.689379', 'step': 16562, 'epoch': 3} {'type': 'loss', 'content': 0.027306493371725082, 'timestamp': '2025-09-30 22:32:39.692833', 'step': 16563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:39.724173', 'step': 16563, 'epoch': 3} {'type': 'loss', 'content': 0.12792742252349854, 'timestamp': '2025-09-30 22:32:39.750999', 'step': 16564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:39.780983', 'step': 16564, 'epoch': 3} {'type': 'loss', 'content': 0.0290040522813797, 'timestamp': '2025-09-30 22:32:39.783441', 'step': 16565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.813814', 'step': 16565, 'epoch': 3} {'type': 'loss', 'content': 0.02854095958173275, 'timestamp': '2025-09-30 22:32:39.823415', 'step': 16566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.853121', 'step': 16566, 'epoch': 3} {'type': 'loss', 'content': 0.0511699914932251, 'timestamp': '2025-09-30 22:32:39.857088', 'step': 16567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:39.889804', 'step': 16567, 'epoch': 3} {'type': 'loss', 'content': 0.05440963804721832, 'timestamp': '2025-09-30 22:32:39.927429', 'step': 16568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:39.961779', 'step': 16568, 'epoch': 3} {'type': 'loss', 'content': 0.08732222765684128, 'timestamp': '2025-09-30 22:32:39.964239', 'step': 16569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.003412', 'step': 16569, 'epoch': 3} {'type': 'loss', 'content': 0.13718341290950775, 'timestamp': '2025-09-30 22:32:40.005936', 'step': 16570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.035552', 'step': 16570, 'epoch': 3} {'type': 'loss', 'content': 0.08958441764116287, 'timestamp': '2025-09-30 22:32:40.039029', 'step': 16571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.070387', 'step': 16571, 'epoch': 3} {'type': 'loss', 'content': 0.036516785621643066, 'timestamp': '2025-09-30 22:32:40.094444', 'step': 16572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.127684', 'step': 16572, 'epoch': 3} {'type': 'loss', 'content': 0.12157297879457474, 'timestamp': '2025-09-30 22:32:40.131143', 'step': 16573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.161641', 'step': 16573, 'epoch': 3} {'type': 'loss', 'content': 0.02448551543056965, 'timestamp': '2025-09-30 22:32:40.164948', 'step': 16574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.196329', 'step': 16574, 'epoch': 3} {'type': 'loss', 'content': 0.10199946165084839, 'timestamp': '2025-09-30 22:32:40.199988', 'step': 16575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.232874', 'step': 16575, 'epoch': 3} {'type': 'loss', 'content': 0.055719345808029175, 'timestamp': '2025-09-30 22:32:40.257068', 'step': 16576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:40.287272', 'step': 16576, 'epoch': 3} {'type': 'loss', 'content': 0.0390663743019104, 'timestamp': '2025-09-30 22:32:40.289569', 'step': 16577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.332098', 'step': 16577, 'epoch': 3} {'type': 'loss', 'content': 0.059833332896232605, 'timestamp': '2025-09-30 22:32:40.336075', 'step': 16578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.372655', 'step': 16578, 'epoch': 3} {'type': 'loss', 'content': 0.09545526653528214, 'timestamp': '2025-09-30 22:32:40.377085', 'step': 16579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.410729', 'step': 16579, 'epoch': 3} {'type': 'loss', 'content': 0.11973559856414795, 'timestamp': '2025-09-30 22:32:40.435974', 'step': 16580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:40.467382', 'step': 16580, 'epoch': 3} {'type': 'loss', 'content': 0.02030370756983757, 'timestamp': '2025-09-30 22:32:40.471653', 'step': 16581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:40.503404', 'step': 16581, 'epoch': 3} {'type': 'loss', 'content': 0.06260839849710464, 'timestamp': '2025-09-30 22:32:40.505924', 'step': 16582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.535980', 'step': 16582, 'epoch': 3} {'type': 'loss', 'content': 0.05914460867643356, 'timestamp': '2025-09-30 22:32:40.538048', 'step': 16583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:40.574338', 'step': 16583, 'epoch': 3} {'type': 'loss', 'content': 0.047005392611026764, 'timestamp': '2025-09-30 22:32:40.599192', 'step': 16584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.635455', 'step': 16584, 'epoch': 3} {'type': 'loss', 'content': 0.05962555110454559, 'timestamp': '2025-09-30 22:32:40.637992', 'step': 16585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.667865', 'step': 16585, 'epoch': 3} {'type': 'loss', 'content': 0.05812397226691246, 'timestamp': '2025-09-30 22:32:40.670597', 'step': 16586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:40.701845', 'step': 16586, 'epoch': 3} {'type': 'loss', 'content': 0.07205807417631149, 'timestamp': '2025-09-30 22:32:40.721973', 'step': 16587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:40.752469', 'step': 16587, 'epoch': 3} {'type': 'loss', 'content': 0.09280470013618469, 'timestamp': '2025-09-30 22:32:40.777009', 'step': 16588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.806744', 'step': 16588, 'epoch': 3} {'type': 'loss', 'content': 0.03682887926697731, 'timestamp': '2025-09-30 22:32:40.820998', 'step': 16589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:40.853085', 'step': 16589, 'epoch': 3} {'type': 'loss', 'content': 0.06432788074016571, 'timestamp': '2025-09-30 22:32:40.855993', 'step': 16590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:40.886437', 'step': 16590, 'epoch': 3} {'type': 'loss', 'content': 0.09655187278985977, 'timestamp': '2025-09-30 22:32:40.888744', 'step': 16591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:40.919491', 'step': 16591, 'epoch': 3} {'type': 'loss', 'content': 0.08424826711416245, 'timestamp': '2025-09-30 22:32:40.945774', 'step': 16592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:40.975722', 'step': 16592, 'epoch': 3} {'type': 'loss', 'content': 0.06541422754526138, 'timestamp': '2025-09-30 22:32:40.977856', 'step': 16593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.008601', 'step': 16593, 'epoch': 3} {'type': 'loss', 'content': 0.07645624130964279, 'timestamp': '2025-09-30 22:32:41.011326', 'step': 16594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:41.041673', 'step': 16594, 'epoch': 3} {'type': 'loss', 'content': 0.07508827745914459, 'timestamp': '2025-09-30 22:32:41.047136', 'step': 16595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.077930', 'step': 16595, 'epoch': 3} {'type': 'loss', 'content': 0.06528665870428085, 'timestamp': '2025-09-30 22:32:41.102207', 'step': 16596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.149185', 'step': 16596, 'epoch': 3} {'type': 'loss', 'content': 0.03152301162481308, 'timestamp': '2025-09-30 22:32:41.151653', 'step': 16597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.182882', 'step': 16597, 'epoch': 3} {'type': 'loss', 'content': 0.08071012794971466, 'timestamp': '2025-09-30 22:32:41.185255', 'step': 16598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:32:41.215649', 'step': 16598, 'epoch': 3} {'type': 'loss', 'content': 0.06013660877943039, 'timestamp': '2025-09-30 22:32:41.222811', 'step': 16599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.254494', 'step': 16599, 'epoch': 3} {'type': 'loss', 'content': 0.0748712345957756, 'timestamp': '2025-09-30 22:32:41.278836', 'step': 16600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.313491', 'step': 16600, 'epoch': 3} {'type': 'loss', 'content': 0.06623421609401703, 'timestamp': '2025-09-30 22:32:41.326622', 'step': 16601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.356887', 'step': 16601, 'epoch': 3} {'type': 'loss', 'content': 0.051327358931303024, 'timestamp': '2025-09-30 22:32:41.360401', 'step': 16602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:41.391720', 'step': 16602, 'epoch': 3} {'type': 'loss', 'content': 0.019819553941488266, 'timestamp': '2025-09-30 22:32:41.394075', 'step': 16603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.438880', 'step': 16603, 'epoch': 3} {'type': 'loss', 'content': 0.09277283400297165, 'timestamp': '2025-09-30 22:32:41.462840', 'step': 16604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.493650', 'step': 16604, 'epoch': 3} {'type': 'loss', 'content': 0.09493664652109146, 'timestamp': '2025-09-30 22:32:41.498128', 'step': 16605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.529476', 'step': 16605, 'epoch': 3} {'type': 'loss', 'content': 0.03365189954638481, 'timestamp': '2025-09-30 22:32:41.532123', 'step': 16606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:41.563153', 'step': 16606, 'epoch': 3} {'type': 'loss', 'content': 0.1317072957754135, 'timestamp': '2025-09-30 22:32:41.565745', 'step': 16607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:41.603935', 'step': 16607, 'epoch': 3} {'type': 'loss', 'content': 0.011494342237710953, 'timestamp': '2025-09-30 22:32:41.629293', 'step': 16608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:41.660660', 'step': 16608, 'epoch': 3} {'type': 'loss', 'content': 0.04324256256222725, 'timestamp': '2025-09-30 22:32:41.663600', 'step': 16609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.693796', 'step': 16609, 'epoch': 3} {'type': 'loss', 'content': 0.08129003643989563, 'timestamp': '2025-09-30 22:32:41.696827', 'step': 16610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:41.731403', 'step': 16610, 'epoch': 3} {'type': 'loss', 'content': 0.0837392657995224, 'timestamp': '2025-09-30 22:32:41.734918', 'step': 16611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:41.764928', 'step': 16611, 'epoch': 3} {'type': 'loss', 'content': 0.030362244695425034, 'timestamp': '2025-09-30 22:32:41.789586', 'step': 16612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:41.823144', 'step': 16612, 'epoch': 3} {'type': 'loss', 'content': 0.12590353190898895, 'timestamp': '2025-09-30 22:32:41.826455', 'step': 16613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.857651', 'step': 16613, 'epoch': 3} {'type': 'loss', 'content': 0.07488910108804703, 'timestamp': '2025-09-30 22:32:41.860571', 'step': 16614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:41.891879', 'step': 16614, 'epoch': 3} {'type': 'loss', 'content': 0.058043405413627625, 'timestamp': '2025-09-30 22:32:41.895906', 'step': 16615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:41.927597', 'step': 16615, 'epoch': 3} {'type': 'loss', 'content': 0.029082201421260834, 'timestamp': '2025-09-30 22:32:41.951888', 'step': 16616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:41.982415', 'step': 16616, 'epoch': 3} {'type': 'loss', 'content': 0.04083003103733063, 'timestamp': '2025-09-30 22:32:41.985284', 'step': 16617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:42.015427', 'step': 16617, 'epoch': 3} {'type': 'loss', 'content': 0.04021931067109108, 'timestamp': '2025-09-30 22:32:42.018388', 'step': 16618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:42.048877', 'step': 16618, 'epoch': 3} {'type': 'loss', 'content': 0.034471943974494934, 'timestamp': '2025-09-30 22:32:42.052186', 'step': 16619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:42.086271', 'step': 16619, 'epoch': 3} {'type': 'loss', 'content': 0.14274904131889343, 'timestamp': '2025-09-30 22:32:42.112764', 'step': 16620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:42.145168', 'step': 16620, 'epoch': 3} {'type': 'loss', 'content': 0.0508267804980278, 'timestamp': '2025-09-30 22:32:42.159237', 'step': 16621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:42.190425', 'step': 16621, 'epoch': 3} {'type': 'loss', 'content': 0.028678758069872856, 'timestamp': '2025-09-30 22:32:42.208655', 'step': 16622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:42.240501', 'step': 16622, 'epoch': 3} {'type': 'loss', 'content': 0.030536536127328873, 'timestamp': '2025-09-30 22:32:42.246629', 'step': 16623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.280651', 'step': 16623, 'epoch': 3} {'type': 'loss', 'content': 0.028948917984962463, 'timestamp': '2025-09-30 22:32:42.307618', 'step': 16624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.345945', 'step': 16624, 'epoch': 3} {'type': 'loss', 'content': 0.14650073647499084, 'timestamp': '2025-09-30 22:32:42.363450', 'step': 16625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.397174', 'step': 16625, 'epoch': 3} {'type': 'loss', 'content': 0.0819888636469841, 'timestamp': '2025-09-30 22:32:42.402877', 'step': 16626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:42.434985', 'step': 16626, 'epoch': 3} {'type': 'loss', 'content': 0.02835421822965145, 'timestamp': '2025-09-30 22:32:42.439380', 'step': 16627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:42.484451', 'step': 16627, 'epoch': 3} {'type': 'loss', 'content': 0.04761350527405739, 'timestamp': '2025-09-30 22:32:42.511044', 'step': 16628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:42.556228', 'step': 16628, 'epoch': 3} {'type': 'loss', 'content': 0.05697663873434067, 'timestamp': '2025-09-30 22:32:42.560171', 'step': 16629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:42.592966', 'step': 16629, 'epoch': 3} {'type': 'loss', 'content': 0.06102195754647255, 'timestamp': '2025-09-30 22:32:42.597772', 'step': 16630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:32:42.630615', 'step': 16630, 'epoch': 3} {'type': 'loss', 'content': 0.09698367118835449, 'timestamp': '2025-09-30 22:32:42.634865', 'step': 16631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:42.668295', 'step': 16631, 'epoch': 3} {'type': 'loss', 'content': 0.04850800707936287, 'timestamp': '2025-09-30 22:32:42.694374', 'step': 16632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:42.727200', 'step': 16632, 'epoch': 3} {'type': 'loss', 'content': 0.007498599123209715, 'timestamp': '2025-09-30 22:32:42.731114', 'step': 16633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.777207', 'step': 16633, 'epoch': 3} {'type': 'loss', 'content': 0.1293274313211441, 'timestamp': '2025-09-30 22:32:42.782567', 'step': 16634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:42.815000', 'step': 16634, 'epoch': 3} {'type': 'loss', 'content': 0.12721873819828033, 'timestamp': '2025-09-30 22:32:42.819064', 'step': 16635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.851156', 'step': 16635, 'epoch': 3} {'type': 'loss', 'content': 0.022659817710518837, 'timestamp': '2025-09-30 22:32:42.875739', 'step': 16636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:42.922006', 'step': 16636, 'epoch': 3} {'type': 'loss', 'content': 0.028559193015098572, 'timestamp': '2025-09-30 22:32:42.925730', 'step': 16637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:42.958707', 'step': 16637, 'epoch': 3} {'type': 'loss', 'content': 0.07725878804922104, 'timestamp': '2025-09-30 22:32:42.964354', 'step': 16638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:42.996265', 'step': 16638, 'epoch': 3} {'type': 'loss', 'content': 0.17087675631046295, 'timestamp': '2025-09-30 22:32:43.001982', 'step': 16639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:43.034406', 'step': 16639, 'epoch': 3} {'type': 'loss', 'content': 0.03152484819293022, 'timestamp': '2025-09-30 22:32:43.074277', 'step': 16640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.118456', 'step': 16640, 'epoch': 3} {'type': 'loss', 'content': 0.022437281906604767, 'timestamp': '2025-09-30 22:32:43.122054', 'step': 16641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:43.153768', 'step': 16641, 'epoch': 3} {'type': 'loss', 'content': 0.0524575375020504, 'timestamp': '2025-09-30 22:32:43.156358', 'step': 16642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:43.187747', 'step': 16642, 'epoch': 3} {'type': 'loss', 'content': 0.1540427803993225, 'timestamp': '2025-09-30 22:32:43.190134', 'step': 16643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.221291', 'step': 16643, 'epoch': 3} {'type': 'loss', 'content': 0.05772465094923973, 'timestamp': '2025-09-30 22:32:43.245409', 'step': 16644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:43.276778', 'step': 16644, 'epoch': 3} {'type': 'loss', 'content': 0.03335285186767578, 'timestamp': '2025-09-30 22:32:43.283619', 'step': 16645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:43.314520', 'step': 16645, 'epoch': 3} {'type': 'loss', 'content': 0.09534002095460892, 'timestamp': '2025-09-30 22:32:43.318318', 'step': 16646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.349023', 'step': 16646, 'epoch': 3} {'type': 'loss', 'content': 0.09078219532966614, 'timestamp': '2025-09-30 22:32:43.353153', 'step': 16647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.385046', 'step': 16647, 'epoch': 3} {'type': 'loss', 'content': 0.03753558546304703, 'timestamp': '2025-09-30 22:32:43.410593', 'step': 16648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.444719', 'step': 16648, 'epoch': 3} {'type': 'loss', 'content': 0.09810155630111694, 'timestamp': '2025-09-30 22:32:43.448233', 'step': 16649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.480699', 'step': 16649, 'epoch': 3} {'type': 'loss', 'content': 0.04281968995928764, 'timestamp': '2025-09-30 22:32:43.485610', 'step': 16650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.518048', 'step': 16650, 'epoch': 3} {'type': 'loss', 'content': 0.04601578786969185, 'timestamp': '2025-09-30 22:32:43.521606', 'step': 16651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.563177', 'step': 16651, 'epoch': 3} {'type': 'loss', 'content': 0.08318605273962021, 'timestamp': '2025-09-30 22:32:43.589023', 'step': 16652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:43.622153', 'step': 16652, 'epoch': 3} {'type': 'loss', 'content': 0.08916588872671127, 'timestamp': '2025-09-30 22:32:43.626296', 'step': 16653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:43.657446', 'step': 16653, 'epoch': 3} {'type': 'loss', 'content': 0.08162149041891098, 'timestamp': '2025-09-30 22:32:43.660870', 'step': 16654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.693262', 'step': 16654, 'epoch': 3} {'type': 'loss', 'content': 0.08194014430046082, 'timestamp': '2025-09-30 22:32:43.698703', 'step': 16655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.743702', 'step': 16655, 'epoch': 3} {'type': 'loss', 'content': 0.08389800041913986, 'timestamp': '2025-09-30 22:32:43.772945', 'step': 16656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:43.804123', 'step': 16656, 'epoch': 3} {'type': 'loss', 'content': 0.031550195068120956, 'timestamp': '2025-09-30 22:32:43.809105', 'step': 16657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:43.841188', 'step': 16657, 'epoch': 3} {'type': 'loss', 'content': 0.08505335450172424, 'timestamp': '2025-09-30 22:32:43.843616', 'step': 16658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.875510', 'step': 16658, 'epoch': 3} {'type': 'loss', 'content': 0.13141950964927673, 'timestamp': '2025-09-30 22:32:43.879077', 'step': 16659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:43.909646', 'step': 16659, 'epoch': 3} {'type': 'loss', 'content': 0.14666908979415894, 'timestamp': '2025-09-30 22:32:43.934059', 'step': 16660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:43.966117', 'step': 16660, 'epoch': 3} {'type': 'loss', 'content': 0.052106428891420364, 'timestamp': '2025-09-30 22:32:43.970407', 'step': 16661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:44.005355', 'step': 16661, 'epoch': 3} {'type': 'loss', 'content': 0.03079453483223915, 'timestamp': '2025-09-30 22:32:44.008641', 'step': 16662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.039368', 'step': 16662, 'epoch': 3} {'type': 'loss', 'content': 0.06548859924077988, 'timestamp': '2025-09-30 22:32:44.042214', 'step': 16663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.075716', 'step': 16663, 'epoch': 3} {'type': 'loss', 'content': 0.039566729217767715, 'timestamp': '2025-09-30 22:32:44.100138', 'step': 16664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:44.132409', 'step': 16664, 'epoch': 3} {'type': 'loss', 'content': 0.1448124647140503, 'timestamp': '2025-09-30 22:32:44.136120', 'step': 16665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:44.168604', 'step': 16665, 'epoch': 3} {'type': 'loss', 'content': 0.0931347906589508, 'timestamp': '2025-09-30 22:32:44.171466', 'step': 16666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.213435', 'step': 16666, 'epoch': 3} {'type': 'loss', 'content': 0.11805298924446106, 'timestamp': '2025-09-30 22:32:44.219282', 'step': 16667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.264173', 'step': 16667, 'epoch': 3} {'type': 'loss', 'content': 0.05539270490407944, 'timestamp': '2025-09-30 22:32:44.288412', 'step': 16668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.318414', 'step': 16668, 'epoch': 3} {'type': 'loss', 'content': 0.04239724949002266, 'timestamp': '2025-09-30 22:32:44.321239', 'step': 16669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.353001', 'step': 16669, 'epoch': 3} {'type': 'loss', 'content': 0.08673322200775146, 'timestamp': '2025-09-30 22:32:44.357101', 'step': 16670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.390289', 'step': 16670, 'epoch': 3} {'type': 'loss', 'content': 0.10174556076526642, 'timestamp': '2025-09-30 22:32:44.397676', 'step': 16671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.432132', 'step': 16671, 'epoch': 3} {'type': 'loss', 'content': 0.07215137034654617, 'timestamp': '2025-09-30 22:32:44.456793', 'step': 16672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.488883', 'step': 16672, 'epoch': 3} {'type': 'loss', 'content': 0.07901428639888763, 'timestamp': '2025-09-30 22:32:44.505532', 'step': 16673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.543327', 'step': 16673, 'epoch': 3} {'type': 'loss', 'content': 0.08127826452255249, 'timestamp': '2025-09-30 22:32:44.553794', 'step': 16674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.585507', 'step': 16674, 'epoch': 3} {'type': 'loss', 'content': 0.021632222458720207, 'timestamp': '2025-09-30 22:32:44.589029', 'step': 16675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.619147', 'step': 16675, 'epoch': 3} {'type': 'loss', 'content': 0.04298844560980797, 'timestamp': '2025-09-30 22:32:44.644389', 'step': 16676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:44.676875', 'step': 16676, 'epoch': 3} {'type': 'loss', 'content': 0.04154844954609871, 'timestamp': '2025-09-30 22:32:44.681426', 'step': 16677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.711629', 'step': 16677, 'epoch': 3} {'type': 'loss', 'content': 0.0446377769112587, 'timestamp': '2025-09-30 22:32:44.717856', 'step': 16678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.748572', 'step': 16678, 'epoch': 3} {'type': 'loss', 'content': 0.02680918015539646, 'timestamp': '2025-09-30 22:32:44.753355', 'step': 16679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:44.784730', 'step': 16679, 'epoch': 3} {'type': 'loss', 'content': 0.06465008109807968, 'timestamp': '2025-09-30 22:32:44.811546', 'step': 16680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.845412', 'step': 16680, 'epoch': 3} {'type': 'loss', 'content': 0.08450879156589508, 'timestamp': '2025-09-30 22:32:44.847890', 'step': 16681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:44.883839', 'step': 16681, 'epoch': 3} {'type': 'loss', 'content': 0.05388014763593674, 'timestamp': '2025-09-30 22:32:44.886920', 'step': 16682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.918676', 'step': 16682, 'epoch': 3} {'type': 'loss', 'content': 0.08063001185655594, 'timestamp': '2025-09-30 22:32:44.921050', 'step': 16683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:44.954582', 'step': 16683, 'epoch': 3} {'type': 'loss', 'content': 0.134281188249588, 'timestamp': '2025-09-30 22:32:44.983498', 'step': 16684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:45.013870', 'step': 16684, 'epoch': 3} {'type': 'loss', 'content': 0.08562564849853516, 'timestamp': '2025-09-30 22:32:45.017659', 'step': 16685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:45.064398', 'step': 16685, 'epoch': 3} {'type': 'loss', 'content': 0.06080537289381027, 'timestamp': '2025-09-30 22:32:45.069567', 'step': 16686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.105116', 'step': 16686, 'epoch': 3} {'type': 'loss', 'content': 0.07049520313739777, 'timestamp': '2025-09-30 22:32:45.108849', 'step': 16687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:45.142902', 'step': 16687, 'epoch': 3} {'type': 'loss', 'content': 0.072694793343544, 'timestamp': '2025-09-30 22:32:45.168093', 'step': 16688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.199227', 'step': 16688, 'epoch': 3} {'type': 'loss', 'content': 0.04964400455355644, 'timestamp': '2025-09-30 22:32:45.204037', 'step': 16689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.235516', 'step': 16689, 'epoch': 3} {'type': 'loss', 'content': 0.045346200466156006, 'timestamp': '2025-09-30 22:32:45.240340', 'step': 16690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:45.272591', 'step': 16690, 'epoch': 3} {'type': 'loss', 'content': 0.05294191092252731, 'timestamp': '2025-09-30 22:32:45.275739', 'step': 16691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.313823', 'step': 16691, 'epoch': 3} {'type': 'loss', 'content': 0.0553041473031044, 'timestamp': '2025-09-30 22:32:45.338839', 'step': 16692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.374747', 'step': 16692, 'epoch': 3} {'type': 'loss', 'content': 0.16741707921028137, 'timestamp': '2025-09-30 22:32:45.377018', 'step': 16693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:45.406604', 'step': 16693, 'epoch': 3} {'type': 'loss', 'content': 0.1185569167137146, 'timestamp': '2025-09-30 22:32:45.410905', 'step': 16694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:45.451836', 'step': 16694, 'epoch': 3} {'type': 'loss', 'content': 0.11274608969688416, 'timestamp': '2025-09-30 22:32:45.456659', 'step': 16695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.496001', 'step': 16695, 'epoch': 3} {'type': 'loss', 'content': 0.1391734778881073, 'timestamp': '2025-09-30 22:32:45.521664', 'step': 16696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.561105', 'step': 16696, 'epoch': 3} {'type': 'loss', 'content': 0.09423698484897614, 'timestamp': '2025-09-30 22:32:45.565813', 'step': 16697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.597045', 'step': 16697, 'epoch': 3} {'type': 'loss', 'content': 0.10143992304801941, 'timestamp': '2025-09-30 22:32:45.600584', 'step': 16698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.632316', 'step': 16698, 'epoch': 3} {'type': 'loss', 'content': 0.05279092863202095, 'timestamp': '2025-09-30 22:32:45.634647', 'step': 16699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:45.674714', 'step': 16699, 'epoch': 3} {'type': 'loss', 'content': 0.06971914321184158, 'timestamp': '2025-09-30 22:32:45.699579', 'step': 16700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.734305', 'step': 16700, 'epoch': 3} {'type': 'loss', 'content': 0.07866697013378143, 'timestamp': '2025-09-30 22:32:45.737728', 'step': 16701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.769844', 'step': 16701, 'epoch': 3} {'type': 'loss', 'content': 0.06857287883758545, 'timestamp': '2025-09-30 22:32:45.773432', 'step': 16702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.805159', 'step': 16702, 'epoch': 3} {'type': 'loss', 'content': 0.02619657665491104, 'timestamp': '2025-09-30 22:32:45.809379', 'step': 16703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:45.847859', 'step': 16703, 'epoch': 3} {'type': 'loss', 'content': 0.06697864085435867, 'timestamp': '2025-09-30 22:32:45.873161', 'step': 16704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:45.904317', 'step': 16704, 'epoch': 3} {'type': 'loss', 'content': 0.08693844825029373, 'timestamp': '2025-09-30 22:32:45.907532', 'step': 16705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.947361', 'step': 16705, 'epoch': 3} {'type': 'loss', 'content': 0.019564591348171234, 'timestamp': '2025-09-30 22:32:45.950710', 'step': 16706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:45.983001', 'step': 16706, 'epoch': 3} {'type': 'loss', 'content': 0.056385014206171036, 'timestamp': '2025-09-30 22:32:45.985989', 'step': 16707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.017811', 'step': 16707, 'epoch': 3} {'type': 'loss', 'content': 0.05609022080898285, 'timestamp': '2025-09-30 22:32:46.042088', 'step': 16708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.089778', 'step': 16708, 'epoch': 3} {'type': 'loss', 'content': 0.04915246739983559, 'timestamp': '2025-09-30 22:32:46.095564', 'step': 16709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.130932', 'step': 16709, 'epoch': 3} {'type': 'loss', 'content': 0.09485858678817749, 'timestamp': '2025-09-30 22:32:46.134688', 'step': 16710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.166979', 'step': 16710, 'epoch': 3} {'type': 'loss', 'content': 0.07395777106285095, 'timestamp': '2025-09-30 22:32:46.170281', 'step': 16711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.206621', 'step': 16711, 'epoch': 3} {'type': 'loss', 'content': 0.08949385583400726, 'timestamp': '2025-09-30 22:32:46.231773', 'step': 16712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.264887', 'step': 16712, 'epoch': 3} {'type': 'loss', 'content': 0.05686177313327789, 'timestamp': '2025-09-30 22:32:46.277098', 'step': 16713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.307900', 'step': 16713, 'epoch': 3} {'type': 'loss', 'content': 0.0862685963511467, 'timestamp': '2025-09-30 22:32:46.311052', 'step': 16714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:46.344322', 'step': 16714, 'epoch': 3} {'type': 'loss', 'content': 0.053370989859104156, 'timestamp': '2025-09-30 22:32:46.347117', 'step': 16715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.384079', 'step': 16715, 'epoch': 3} {'type': 'loss', 'content': 0.11813563108444214, 'timestamp': '2025-09-30 22:32:46.408455', 'step': 16716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.447483', 'step': 16716, 'epoch': 3} {'type': 'loss', 'content': 0.19808411598205566, 'timestamp': '2025-09-30 22:32:46.451609', 'step': 16717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:46.490725', 'step': 16717, 'epoch': 3} {'type': 'loss', 'content': 0.13724660873413086, 'timestamp': '2025-09-30 22:32:46.494123', 'step': 16718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.525534', 'step': 16718, 'epoch': 3} {'type': 'loss', 'content': 0.10604006052017212, 'timestamp': '2025-09-30 22:32:46.530057', 'step': 16719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.562579', 'step': 16719, 'epoch': 3} {'type': 'loss', 'content': 0.0617944672703743, 'timestamp': '2025-09-30 22:32:46.588085', 'step': 16720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:46.619324', 'step': 16720, 'epoch': 3} {'type': 'loss', 'content': 0.07570941001176834, 'timestamp': '2025-09-30 22:32:46.628645', 'step': 16721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.660128', 'step': 16721, 'epoch': 3} {'type': 'loss', 'content': 0.07425356656312943, 'timestamp': '2025-09-30 22:32:46.664666', 'step': 16722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:46.697389', 'step': 16722, 'epoch': 3} {'type': 'loss', 'content': 0.08467046916484833, 'timestamp': '2025-09-30 22:32:46.700512', 'step': 16723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:46.731824', 'step': 16723, 'epoch': 3} {'type': 'loss', 'content': 0.1264113485813141, 'timestamp': '2025-09-30 22:32:46.756850', 'step': 16724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:46.788783', 'step': 16724, 'epoch': 3} {'type': 'loss', 'content': 0.11194606125354767, 'timestamp': '2025-09-30 22:32:46.792995', 'step': 16725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:46.824053', 'step': 16725, 'epoch': 3} {'type': 'loss', 'content': 0.1280282586812973, 'timestamp': '2025-09-30 22:32:46.828747', 'step': 16726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:46.861775', 'step': 16726, 'epoch': 3} {'type': 'loss', 'content': 0.09831371903419495, 'timestamp': '2025-09-30 22:32:46.865774', 'step': 16727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:46.898945', 'step': 16727, 'epoch': 3} {'type': 'loss', 'content': 0.058474667370319366, 'timestamp': '2025-09-30 22:32:46.925602', 'step': 16728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:46.956951', 'step': 16728, 'epoch': 3} {'type': 'loss', 'content': 0.056019533425569534, 'timestamp': '2025-09-30 22:32:46.959183', 'step': 16729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:32:46.990936', 'step': 16729, 'epoch': 3} {'type': 'loss', 'content': 0.1277635097503662, 'timestamp': '2025-09-30 22:32:47.000727', 'step': 16730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:47.031625', 'step': 16730, 'epoch': 3} {'type': 'loss', 'content': 0.027122702449560165, 'timestamp': '2025-09-30 22:32:47.043495', 'step': 16731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.075316', 'step': 16731, 'epoch': 3} {'type': 'loss', 'content': 0.06810850650072098, 'timestamp': '2025-09-30 22:32:47.099114', 'step': 16732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:47.131683', 'step': 16732, 'epoch': 3} {'type': 'loss', 'content': 0.062068358063697815, 'timestamp': '2025-09-30 22:32:47.136078', 'step': 16733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:32:47.166628', 'step': 16733, 'epoch': 3} {'type': 'loss', 'content': 0.04730544984340668, 'timestamp': '2025-09-30 22:32:47.171293', 'step': 16734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.203048', 'step': 16734, 'epoch': 3} {'type': 'loss', 'content': 0.03199096396565437, 'timestamp': '2025-09-30 22:32:47.206602', 'step': 16735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.237390', 'step': 16735, 'epoch': 3} {'type': 'loss', 'content': 0.07603557407855988, 'timestamp': '2025-09-30 22:32:47.262177', 'step': 16736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.299450', 'step': 16736, 'epoch': 3} {'type': 'loss', 'content': 0.08531833440065384, 'timestamp': '2025-09-30 22:32:47.302998', 'step': 16737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.342301', 'step': 16737, 'epoch': 3} {'type': 'loss', 'content': 0.04119015112519264, 'timestamp': '2025-09-30 22:32:47.345578', 'step': 16738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.376046', 'step': 16738, 'epoch': 3} {'type': 'loss', 'content': 0.09733781218528748, 'timestamp': '2025-09-30 22:32:47.379012', 'step': 16739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.410791', 'step': 16739, 'epoch': 3} {'type': 'loss', 'content': 0.11879144608974457, 'timestamp': '2025-09-30 22:32:47.434587', 'step': 16740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.468245', 'step': 16740, 'epoch': 3} {'type': 'loss', 'content': 0.02704620361328125, 'timestamp': '2025-09-30 22:32:47.484025', 'step': 16741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.516553', 'step': 16741, 'epoch': 3} {'type': 'loss', 'content': 0.09292103350162506, 'timestamp': '2025-09-30 22:32:47.519512', 'step': 16742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:47.556781', 'step': 16742, 'epoch': 3} {'type': 'loss', 'content': 0.0680989995598793, 'timestamp': '2025-09-30 22:32:47.560530', 'step': 16743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:47.592793', 'step': 16743, 'epoch': 3} {'type': 'loss', 'content': 0.09030312299728394, 'timestamp': '2025-09-30 22:32:47.617206', 'step': 16744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:47.649189', 'step': 16744, 'epoch': 3} {'type': 'loss', 'content': 0.0832274779677391, 'timestamp': '2025-09-30 22:32:47.652928', 'step': 16745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.691029', 'step': 16745, 'epoch': 3} {'type': 'loss', 'content': 0.07505317777395248, 'timestamp': '2025-09-30 22:32:47.694042', 'step': 16746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.728330', 'step': 16746, 'epoch': 3} {'type': 'loss', 'content': 0.11997576057910919, 'timestamp': '2025-09-30 22:32:47.743787', 'step': 16747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.777055', 'step': 16747, 'epoch': 3} {'type': 'loss', 'content': 0.03481040149927139, 'timestamp': '2025-09-30 22:32:47.801241', 'step': 16748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.841030', 'step': 16748, 'epoch': 3} {'type': 'loss', 'content': 0.06386274099349976, 'timestamp': '2025-09-30 22:32:47.843348', 'step': 16749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.876097', 'step': 16749, 'epoch': 3} {'type': 'loss', 'content': 0.024468692019581795, 'timestamp': '2025-09-30 22:32:47.890999', 'step': 16750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:47.923064', 'step': 16750, 'epoch': 3} {'type': 'loss', 'content': 0.09084003418684006, 'timestamp': '2025-09-30 22:32:47.926558', 'step': 16751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:47.959303', 'step': 16751, 'epoch': 3} {'type': 'loss', 'content': 0.1267862468957901, 'timestamp': '2025-09-30 22:32:47.983196', 'step': 16752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:48.014008', 'step': 16752, 'epoch': 3} {'type': 'loss', 'content': 0.08421305567026138, 'timestamp': '2025-09-30 22:32:48.019866', 'step': 16753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:48.055432', 'step': 16753, 'epoch': 3} {'type': 'loss', 'content': 0.07522063702344894, 'timestamp': '2025-09-30 22:32:48.059770', 'step': 16754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:48.093394', 'step': 16754, 'epoch': 3} {'type': 'loss', 'content': 0.055210720747709274, 'timestamp': '2025-09-30 22:32:48.095595', 'step': 16755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.128101', 'step': 16755, 'epoch': 3} {'type': 'loss', 'content': 0.10446072369813919, 'timestamp': '2025-09-30 22:32:48.152040', 'step': 16756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:48.189570', 'step': 16756, 'epoch': 3} {'type': 'loss', 'content': 0.018988149240612984, 'timestamp': '2025-09-30 22:32:48.191816', 'step': 16757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:48.236858', 'step': 16757, 'epoch': 3} {'type': 'loss', 'content': 0.11759097129106522, 'timestamp': '2025-09-30 22:32:48.240524', 'step': 16758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.273881', 'step': 16758, 'epoch': 3} {'type': 'loss', 'content': 0.14846384525299072, 'timestamp': '2025-09-30 22:32:48.278538', 'step': 16759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.308562', 'step': 16759, 'epoch': 3} {'type': 'loss', 'content': 0.13919459283351898, 'timestamp': '2025-09-30 22:32:48.334046', 'step': 16760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.368197', 'step': 16760, 'epoch': 3} {'type': 'loss', 'content': 0.02377133071422577, 'timestamp': '2025-09-30 22:32:48.370762', 'step': 16761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.407668', 'step': 16761, 'epoch': 3} {'type': 'loss', 'content': 0.04344191029667854, 'timestamp': '2025-09-30 22:32:48.410768', 'step': 16762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:48.450448', 'step': 16762, 'epoch': 3} {'type': 'loss', 'content': 0.05876816436648369, 'timestamp': '2025-09-30 22:32:48.452584', 'step': 16763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.491630', 'step': 16763, 'epoch': 3} {'type': 'loss', 'content': 0.05101136863231659, 'timestamp': '2025-09-30 22:32:48.515940', 'step': 16764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.550163', 'step': 16764, 'epoch': 3} {'type': 'loss', 'content': 0.11008056253194809, 'timestamp': '2025-09-30 22:32:48.554342', 'step': 16765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.584638', 'step': 16765, 'epoch': 3} {'type': 'loss', 'content': 0.09304959326982498, 'timestamp': '2025-09-30 22:32:48.594451', 'step': 16766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:48.625932', 'step': 16766, 'epoch': 3} {'type': 'loss', 'content': 0.05244564265012741, 'timestamp': '2025-09-30 22:32:48.628696', 'step': 16767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.660184', 'step': 16767, 'epoch': 3} {'type': 'loss', 'content': 0.0771864578127861, 'timestamp': '2025-09-30 22:32:48.684925', 'step': 16768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.722328', 'step': 16768, 'epoch': 3} {'type': 'loss', 'content': 0.08905888348817825, 'timestamp': '2025-09-30 22:32:48.724587', 'step': 16769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.755880', 'step': 16769, 'epoch': 3} {'type': 'loss', 'content': 0.06240645796060562, 'timestamp': '2025-09-30 22:32:48.757977', 'step': 16770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.792118', 'step': 16770, 'epoch': 3} {'type': 'loss', 'content': 0.03915931284427643, 'timestamp': '2025-09-30 22:32:48.796261', 'step': 16771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.827563', 'step': 16771, 'epoch': 3} {'type': 'loss', 'content': 0.09699183702468872, 'timestamp': '2025-09-30 22:32:48.852271', 'step': 16772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.882488', 'step': 16772, 'epoch': 3} {'type': 'loss', 'content': 0.08728434145450592, 'timestamp': '2025-09-30 22:32:48.888713', 'step': 16773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:48.936528', 'step': 16773, 'epoch': 3} {'type': 'loss', 'content': 0.05056694522500038, 'timestamp': '2025-09-30 22:32:48.953832', 'step': 16774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:48.988858', 'step': 16774, 'epoch': 3} {'type': 'loss', 'content': 0.05813279747962952, 'timestamp': '2025-09-30 22:32:48.999514', 'step': 16775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:49.037883', 'step': 16775, 'epoch': 3} {'type': 'loss', 'content': 0.06090250611305237, 'timestamp': '2025-09-30 22:32:49.061480', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:32:56.748789', 'step': 16776, 'epoch': 3} {'type': 'pplx', 'content': 11204.19285209931, 'timestamp': '2025-09-30 22:32:56.754910', 'step': 16776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:56.785919', 'step': 16776, 'epoch': 3} {'type': 'loss', 'content': 0.059619124978780746, 'timestamp': '2025-09-30 22:32:56.788793', 'step': 16777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:56.820091', 'step': 16777, 'epoch': 3} {'type': 'loss', 'content': 0.038073014467954636, 'timestamp': '2025-09-30 22:32:56.822064', 'step': 16778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:56.853811', 'step': 16778, 'epoch': 3} {'type': 'loss', 'content': 0.05721166357398033, 'timestamp': '2025-09-30 22:32:56.856970', 'step': 16779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:56.888410', 'step': 16779, 'epoch': 3} {'type': 'loss', 'content': 0.08473042398691177, 'timestamp': '2025-09-30 22:32:56.912500', 'step': 16780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:56.946240', 'step': 16780, 'epoch': 3} {'type': 'loss', 'content': 0.09773585945367813, 'timestamp': '2025-09-30 22:32:56.949300', 'step': 16781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:56.980329', 'step': 16781, 'epoch': 3} {'type': 'loss', 'content': 0.06808502972126007, 'timestamp': '2025-09-30 22:32:56.984334', 'step': 16782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:57.017699', 'step': 16782, 'epoch': 3} {'type': 'loss', 'content': 0.07378507405519485, 'timestamp': '2025-09-30 22:32:57.022372', 'step': 16783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.053486', 'step': 16783, 'epoch': 3} {'type': 'loss', 'content': 0.10633480548858643, 'timestamp': '2025-09-30 22:32:57.090620', 'step': 16784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.123154', 'step': 16784, 'epoch': 3} {'type': 'loss', 'content': 0.07475551217794418, 'timestamp': '2025-09-30 22:32:57.132544', 'step': 16785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.163794', 'step': 16785, 'epoch': 3} {'type': 'loss', 'content': 0.07737518101930618, 'timestamp': '2025-09-30 22:32:57.166747', 'step': 16786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.199660', 'step': 16786, 'epoch': 3} {'type': 'loss', 'content': 0.06854844838380814, 'timestamp': '2025-09-30 22:32:57.203074', 'step': 16787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.234707', 'step': 16787, 'epoch': 3} {'type': 'loss', 'content': 0.10047836601734161, 'timestamp': '2025-09-30 22:32:57.260345', 'step': 16788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.292294', 'step': 16788, 'epoch': 3} {'type': 'loss', 'content': 0.07186098396778107, 'timestamp': '2025-09-30 22:32:57.299124', 'step': 16789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.333495', 'step': 16789, 'epoch': 3} {'type': 'loss', 'content': 0.04897606372833252, 'timestamp': '2025-09-30 22:32:57.339380', 'step': 16790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.371872', 'step': 16790, 'epoch': 3} {'type': 'loss', 'content': 0.11083343625068665, 'timestamp': '2025-09-30 22:32:57.374290', 'step': 16791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.406503', 'step': 16791, 'epoch': 3} {'type': 'loss', 'content': 0.05242082476615906, 'timestamp': '2025-09-30 22:32:57.430570', 'step': 16792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.462879', 'step': 16792, 'epoch': 3} {'type': 'loss', 'content': 0.09135067462921143, 'timestamp': '2025-09-30 22:32:57.466618', 'step': 16793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:57.504325', 'step': 16793, 'epoch': 3} {'type': 'loss', 'content': 0.011093090288341045, 'timestamp': '2025-09-30 22:32:57.508250', 'step': 16794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.541719', 'step': 16794, 'epoch': 3} {'type': 'loss', 'content': 0.024451332166790962, 'timestamp': '2025-09-30 22:32:57.544312', 'step': 16795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.576277', 'step': 16795, 'epoch': 3} {'type': 'loss', 'content': 0.04469050467014313, 'timestamp': '2025-09-30 22:32:57.614548', 'step': 16796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:57.647080', 'step': 16796, 'epoch': 3} {'type': 'loss', 'content': 0.09306800365447998, 'timestamp': '2025-09-30 22:32:57.650020', 'step': 16797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.695349', 'step': 16797, 'epoch': 3} {'type': 'loss', 'content': 0.027899503707885742, 'timestamp': '2025-09-30 22:32:57.698145', 'step': 16798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.733205', 'step': 16798, 'epoch': 3} {'type': 'loss', 'content': 0.031006406992673874, 'timestamp': '2025-09-30 22:32:57.742248', 'step': 16799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:57.776174', 'step': 16799, 'epoch': 3} {'type': 'loss', 'content': 0.0501982755959034, 'timestamp': '2025-09-30 22:32:57.802765', 'step': 16800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.838097', 'step': 16800, 'epoch': 3} {'type': 'loss', 'content': 0.07875463366508484, 'timestamp': '2025-09-30 22:32:57.842510', 'step': 16801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.876591', 'step': 16801, 'epoch': 3} {'type': 'loss', 'content': 0.035021331161260605, 'timestamp': '2025-09-30 22:32:57.879331', 'step': 16802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.915581', 'step': 16802, 'epoch': 3} {'type': 'loss', 'content': 0.09377610683441162, 'timestamp': '2025-09-30 22:32:57.928395', 'step': 16803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:57.962805', 'step': 16803, 'epoch': 3} {'type': 'loss', 'content': 0.05481570586562157, 'timestamp': '2025-09-30 22:32:57.990019', 'step': 16804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.026870', 'step': 16804, 'epoch': 3} {'type': 'loss', 'content': 0.026257425546646118, 'timestamp': '2025-09-30 22:32:58.031778', 'step': 16805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.065186', 'step': 16805, 'epoch': 3} {'type': 'loss', 'content': 0.04862549528479576, 'timestamp': '2025-09-30 22:32:58.067921', 'step': 16806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:58.111487', 'step': 16806, 'epoch': 3} {'type': 'loss', 'content': 0.05016205832362175, 'timestamp': '2025-09-30 22:32:58.114488', 'step': 16807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:58.146719', 'step': 16807, 'epoch': 3} {'type': 'loss', 'content': 0.0813906267285347, 'timestamp': '2025-09-30 22:32:58.171995', 'step': 16808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.207062', 'step': 16808, 'epoch': 3} {'type': 'loss', 'content': 0.05202421545982361, 'timestamp': '2025-09-30 22:32:58.210339', 'step': 16809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.245068', 'step': 16809, 'epoch': 3} {'type': 'loss', 'content': 0.05252012610435486, 'timestamp': '2025-09-30 22:32:58.247885', 'step': 16810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:58.278466', 'step': 16810, 'epoch': 3} {'type': 'loss', 'content': 0.02831083908677101, 'timestamp': '2025-09-30 22:32:58.282820', 'step': 16811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.317045', 'step': 16811, 'epoch': 3} {'type': 'loss', 'content': 0.025271905586123466, 'timestamp': '2025-09-30 22:32:58.342409', 'step': 16812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.389338', 'step': 16812, 'epoch': 3} {'type': 'loss', 'content': 0.03508042171597481, 'timestamp': '2025-09-30 22:32:58.393249', 'step': 16813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.437321', 'step': 16813, 'epoch': 3} {'type': 'loss', 'content': 0.06023164838552475, 'timestamp': '2025-09-30 22:32:58.440555', 'step': 16814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:58.488380', 'step': 16814, 'epoch': 3} {'type': 'loss', 'content': 0.06934285163879395, 'timestamp': '2025-09-30 22:32:58.491041', 'step': 16815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.522425', 'step': 16815, 'epoch': 3} {'type': 'loss', 'content': 0.08344707638025284, 'timestamp': '2025-09-30 22:32:58.558450', 'step': 16816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.589153', 'step': 16816, 'epoch': 3} {'type': 'loss', 'content': 0.04269357770681381, 'timestamp': '2025-09-30 22:32:58.591879', 'step': 16817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.622124', 'step': 16817, 'epoch': 3} {'type': 'loss', 'content': 0.025486554950475693, 'timestamp': '2025-09-30 22:32:58.631442', 'step': 16818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.663991', 'step': 16818, 'epoch': 3} {'type': 'loss', 'content': 0.06793291866779327, 'timestamp': '2025-09-30 22:32:58.666720', 'step': 16819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.697495', 'step': 16819, 'epoch': 3} {'type': 'loss', 'content': 0.04129800200462341, 'timestamp': '2025-09-30 22:32:58.721482', 'step': 16820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.753624', 'step': 16820, 'epoch': 3} {'type': 'loss', 'content': 0.07382305711507797, 'timestamp': '2025-09-30 22:32:58.764368', 'step': 16821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.796671', 'step': 16821, 'epoch': 3} {'type': 'loss', 'content': 0.07789719849824905, 'timestamp': '2025-09-30 22:32:58.799734', 'step': 16822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:58.832537', 'step': 16822, 'epoch': 3} {'type': 'loss', 'content': 0.09129303693771362, 'timestamp': '2025-09-30 22:32:58.849743', 'step': 16823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:58.880446', 'step': 16823, 'epoch': 3} {'type': 'loss', 'content': 0.15145018696784973, 'timestamp': '2025-09-30 22:32:58.904778', 'step': 16824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.934776', 'step': 16824, 'epoch': 3} {'type': 'loss', 'content': 0.024340247735381126, 'timestamp': '2025-09-30 22:32:58.940542', 'step': 16825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:58.974308', 'step': 16825, 'epoch': 3} {'type': 'loss', 'content': 0.09155742079019547, 'timestamp': '2025-09-30 22:32:58.976491', 'step': 16826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.007911', 'step': 16826, 'epoch': 3} {'type': 'loss', 'content': 0.12858222424983978, 'timestamp': '2025-09-30 22:32:59.011112', 'step': 16827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.044477', 'step': 16827, 'epoch': 3} {'type': 'loss', 'content': 0.0587204173207283, 'timestamp': '2025-09-30 22:32:59.068518', 'step': 16828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.101448', 'step': 16828, 'epoch': 3} {'type': 'loss', 'content': 0.072732113301754, 'timestamp': '2025-09-30 22:32:59.105999', 'step': 16829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:59.137326', 'step': 16829, 'epoch': 3} {'type': 'loss', 'content': 0.05877412483096123, 'timestamp': '2025-09-30 22:32:59.140465', 'step': 16830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.171041', 'step': 16830, 'epoch': 3} {'type': 'loss', 'content': 0.037551794201135635, 'timestamp': '2025-09-30 22:32:59.174252', 'step': 16831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.204280', 'step': 16831, 'epoch': 3} {'type': 'loss', 'content': 0.06852090358734131, 'timestamp': '2025-09-30 22:32:59.228827', 'step': 16832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.258610', 'step': 16832, 'epoch': 3} {'type': 'loss', 'content': 0.03420476242899895, 'timestamp': '2025-09-30 22:32:59.262911', 'step': 16833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.307729', 'step': 16833, 'epoch': 3} {'type': 'loss', 'content': 0.07011328637599945, 'timestamp': '2025-09-30 22:32:59.310026', 'step': 16834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:32:59.342598', 'step': 16834, 'epoch': 3} {'type': 'loss', 'content': 0.09839355200529099, 'timestamp': '2025-09-30 22:32:59.346139', 'step': 16835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:59.377834', 'step': 16835, 'epoch': 3} {'type': 'loss', 'content': 0.08436967432498932, 'timestamp': '2025-09-30 22:32:59.402019', 'step': 16836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.435674', 'step': 16836, 'epoch': 3} {'type': 'loss', 'content': 0.09519059211015701, 'timestamp': '2025-09-30 22:32:59.437683', 'step': 16837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:59.473539', 'step': 16837, 'epoch': 3} {'type': 'loss', 'content': 0.09362170845270157, 'timestamp': '2025-09-30 22:32:59.476554', 'step': 16838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.507849', 'step': 16838, 'epoch': 3} {'type': 'loss', 'content': 0.08955100923776627, 'timestamp': '2025-09-30 22:32:59.510237', 'step': 16839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.543031', 'step': 16839, 'epoch': 3} {'type': 'loss', 'content': 0.04609578475356102, 'timestamp': '2025-09-30 22:32:59.569122', 'step': 16840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.599849', 'step': 16840, 'epoch': 3} {'type': 'loss', 'content': 0.10678007453680038, 'timestamp': '2025-09-30 22:32:59.603235', 'step': 16841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.634867', 'step': 16841, 'epoch': 3} {'type': 'loss', 'content': 0.0419338196516037, 'timestamp': '2025-09-30 22:32:59.653549', 'step': 16842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:32:59.691757', 'step': 16842, 'epoch': 3} {'type': 'loss', 'content': 0.044037044048309326, 'timestamp': '2025-09-30 22:32:59.694935', 'step': 16843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:32:59.729879', 'step': 16843, 'epoch': 3} {'type': 'loss', 'content': 0.1027059331536293, 'timestamp': '2025-09-30 22:32:59.760696', 'step': 16844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.791992', 'step': 16844, 'epoch': 3} {'type': 'loss', 'content': 0.1143827959895134, 'timestamp': '2025-09-30 22:32:59.794345', 'step': 16845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:32:59.824783', 'step': 16845, 'epoch': 3} {'type': 'loss', 'content': 0.10209671407938004, 'timestamp': '2025-09-30 22:32:59.828962', 'step': 16846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:32:59.868724', 'step': 16846, 'epoch': 3} {'type': 'loss', 'content': 0.02172059379518032, 'timestamp': '2025-09-30 22:32:59.872122', 'step': 16847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.901965', 'step': 16847, 'epoch': 3} {'type': 'loss', 'content': 0.059549324214458466, 'timestamp': '2025-09-30 22:32:59.925623', 'step': 16848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.958520', 'step': 16848, 'epoch': 3} {'type': 'loss', 'content': 0.06539085507392883, 'timestamp': '2025-09-30 22:32:59.961312', 'step': 16849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:32:59.991845', 'step': 16849, 'epoch': 3} {'type': 'loss', 'content': 0.0332205593585968, 'timestamp': '2025-09-30 22:32:59.994714', 'step': 16850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:00.038167', 'step': 16850, 'epoch': 3} {'type': 'loss', 'content': 0.055062245577573776, 'timestamp': '2025-09-30 22:33:00.054125', 'step': 16851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.087017', 'step': 16851, 'epoch': 3} {'type': 'loss', 'content': 0.06417957693338394, 'timestamp': '2025-09-30 22:33:00.111402', 'step': 16852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.148167', 'step': 16852, 'epoch': 3} {'type': 'loss', 'content': 0.09334444254636765, 'timestamp': '2025-09-30 22:33:00.150944', 'step': 16853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.189290', 'step': 16853, 'epoch': 3} {'type': 'loss', 'content': 0.06305644661188126, 'timestamp': '2025-09-30 22:33:00.191929', 'step': 16854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:00.242380', 'step': 16854, 'epoch': 3} {'type': 'loss', 'content': 0.08450117707252502, 'timestamp': '2025-09-30 22:33:00.245389', 'step': 16855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.276934', 'step': 16855, 'epoch': 3} {'type': 'loss', 'content': 0.12959551811218262, 'timestamp': '2025-09-30 22:33:00.300576', 'step': 16856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:00.334235', 'step': 16856, 'epoch': 3} {'type': 'loss', 'content': 0.03887219727039337, 'timestamp': '2025-09-30 22:33:00.347011', 'step': 16857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.380871', 'step': 16857, 'epoch': 3} {'type': 'loss', 'content': 0.10848663002252579, 'timestamp': '2025-09-30 22:33:00.383833', 'step': 16858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.422616', 'step': 16858, 'epoch': 3} {'type': 'loss', 'content': 0.1089826300740242, 'timestamp': '2025-09-30 22:33:00.424722', 'step': 16859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.466911', 'step': 16859, 'epoch': 3} {'type': 'loss', 'content': 0.06536462903022766, 'timestamp': '2025-09-30 22:33:00.491018', 'step': 16860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.521638', 'step': 16860, 'epoch': 3} {'type': 'loss', 'content': 0.11261007934808731, 'timestamp': '2025-09-30 22:33:00.526441', 'step': 16861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:00.558065', 'step': 16861, 'epoch': 3} {'type': 'loss', 'content': 0.0793004259467125, 'timestamp': '2025-09-30 22:33:00.561590', 'step': 16862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.595881', 'step': 16862, 'epoch': 3} {'type': 'loss', 'content': 0.05105503275990486, 'timestamp': '2025-09-30 22:33:00.599120', 'step': 16863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.631812', 'step': 16863, 'epoch': 3} {'type': 'loss', 'content': 0.1622042953968048, 'timestamp': '2025-09-30 22:33:00.656811', 'step': 16864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.687108', 'step': 16864, 'epoch': 3} {'type': 'loss', 'content': 0.17006424069404602, 'timestamp': '2025-09-30 22:33:00.690052', 'step': 16865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.722377', 'step': 16865, 'epoch': 3} {'type': 'loss', 'content': 0.08052700012922287, 'timestamp': '2025-09-30 22:33:00.730467', 'step': 16866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.771059', 'step': 16866, 'epoch': 3} {'type': 'loss', 'content': 0.06982563436031342, 'timestamp': '2025-09-30 22:33:00.784574', 'step': 16867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:00.817787', 'step': 16867, 'epoch': 3} {'type': 'loss', 'content': 0.06399880349636078, 'timestamp': '2025-09-30 22:33:00.852005', 'step': 16868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:00.882494', 'step': 16868, 'epoch': 3} {'type': 'loss', 'content': 0.24953407049179077, 'timestamp': '2025-09-30 22:33:00.885891', 'step': 16869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:00.915817', 'step': 16869, 'epoch': 3} {'type': 'loss', 'content': 0.08167966455221176, 'timestamp': '2025-09-30 22:33:00.918995', 'step': 16870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:00.950712', 'step': 16870, 'epoch': 3} {'type': 'loss', 'content': 0.1107100248336792, 'timestamp': '2025-09-30 22:33:00.955853', 'step': 16871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:00.989331', 'step': 16871, 'epoch': 3} {'type': 'loss', 'content': 0.04241478070616722, 'timestamp': '2025-09-30 22:33:01.015018', 'step': 16872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:01.052907', 'step': 16872, 'epoch': 3} {'type': 'loss', 'content': 0.04633947089314461, 'timestamp': '2025-09-30 22:33:01.055909', 'step': 16873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.092942', 'step': 16873, 'epoch': 3} {'type': 'loss', 'content': 0.04244677349925041, 'timestamp': '2025-09-30 22:33:01.097635', 'step': 16874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:01.139632', 'step': 16874, 'epoch': 3} {'type': 'loss', 'content': 0.07687491923570633, 'timestamp': '2025-09-30 22:33:01.145977', 'step': 16875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:01.178891', 'step': 16875, 'epoch': 3} {'type': 'loss', 'content': 0.051181577146053314, 'timestamp': '2025-09-30 22:33:01.206137', 'step': 16876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.236524', 'step': 16876, 'epoch': 3} {'type': 'loss', 'content': 0.08844425529241562, 'timestamp': '2025-09-30 22:33:01.247457', 'step': 16877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.282592', 'step': 16877, 'epoch': 3} {'type': 'loss', 'content': 0.05992715060710907, 'timestamp': '2025-09-30 22:33:01.287119', 'step': 16878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:01.319319', 'step': 16878, 'epoch': 3} {'type': 'loss', 'content': 0.07286881655454636, 'timestamp': '2025-09-30 22:33:01.324082', 'step': 16879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:01.372791', 'step': 16879, 'epoch': 3} {'type': 'loss', 'content': 0.07803162932395935, 'timestamp': '2025-09-30 22:33:01.396535', 'step': 16880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.435706', 'step': 16880, 'epoch': 3} {'type': 'loss', 'content': 0.03648656606674194, 'timestamp': '2025-09-30 22:33:01.450569', 'step': 16881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:01.482604', 'step': 16881, 'epoch': 3} {'type': 'loss', 'content': 0.06202996149659157, 'timestamp': '2025-09-30 22:33:01.485944', 'step': 16882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.520876', 'step': 16882, 'epoch': 3} {'type': 'loss', 'content': 0.058983612805604935, 'timestamp': '2025-09-30 22:33:01.530474', 'step': 16883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:01.565560', 'step': 16883, 'epoch': 3} {'type': 'loss', 'content': 0.049348194152116776, 'timestamp': '2025-09-30 22:33:01.591693', 'step': 16884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:01.624599', 'step': 16884, 'epoch': 3} {'type': 'loss', 'content': 0.030153678730130196, 'timestamp': '2025-09-30 22:33:01.631106', 'step': 16885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:01.676994', 'step': 16885, 'epoch': 3} {'type': 'loss', 'content': 0.09191480278968811, 'timestamp': '2025-09-30 22:33:01.681920', 'step': 16886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:01.732520', 'step': 16886, 'epoch': 3} {'type': 'loss', 'content': 0.16076557338237762, 'timestamp': '2025-09-30 22:33:01.748367', 'step': 16887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:01.781208', 'step': 16887, 'epoch': 3} {'type': 'loss', 'content': 0.0779883936047554, 'timestamp': '2025-09-30 22:33:01.806396', 'step': 16888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:01.851077', 'step': 16888, 'epoch': 3} {'type': 'loss', 'content': 0.06146831437945366, 'timestamp': '2025-09-30 22:33:01.873887', 'step': 16889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.916170', 'step': 16889, 'epoch': 3} {'type': 'loss', 'content': 0.07326571643352509, 'timestamp': '2025-09-30 22:33:01.920308', 'step': 16890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:01.981863', 'step': 16890, 'epoch': 3} {'type': 'loss', 'content': 0.07642503082752228, 'timestamp': '2025-09-30 22:33:01.986669', 'step': 16891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.024919', 'step': 16891, 'epoch': 3} {'type': 'loss', 'content': 0.07825781404972076, 'timestamp': '2025-09-30 22:33:02.049931', 'step': 16892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.091528', 'step': 16892, 'epoch': 3} {'type': 'loss', 'content': 0.06725820899009705, 'timestamp': '2025-09-30 22:33:02.103504', 'step': 16893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:02.140172', 'step': 16893, 'epoch': 3} {'type': 'loss', 'content': 0.06330561637878418, 'timestamp': '2025-09-30 22:33:02.146081', 'step': 16894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:02.180593', 'step': 16894, 'epoch': 3} {'type': 'loss', 'content': 0.06983673572540283, 'timestamp': '2025-09-30 22:33:02.185082', 'step': 16895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:02.222673', 'step': 16895, 'epoch': 3} {'type': 'loss', 'content': 0.05426444485783577, 'timestamp': '2025-09-30 22:33:02.246790', 'step': 16896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:02.279042', 'step': 16896, 'epoch': 3} {'type': 'loss', 'content': 0.03397882357239723, 'timestamp': '2025-09-30 22:33:02.292286', 'step': 16897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.330345', 'step': 16897, 'epoch': 3} {'type': 'loss', 'content': 0.064796082675457, 'timestamp': '2025-09-30 22:33:02.335980', 'step': 16898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:02.377430', 'step': 16898, 'epoch': 3} {'type': 'loss', 'content': 0.057107698172330856, 'timestamp': '2025-09-30 22:33:02.382399', 'step': 16899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.435762', 'step': 16899, 'epoch': 3} {'type': 'loss', 'content': 0.06781227141618729, 'timestamp': '2025-09-30 22:33:02.461322', 'step': 16900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.498624', 'step': 16900, 'epoch': 3} {'type': 'loss', 'content': 0.09240485727787018, 'timestamp': '2025-09-30 22:33:02.510264', 'step': 16901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:02.543654', 'step': 16901, 'epoch': 3} {'type': 'loss', 'content': 0.15841838717460632, 'timestamp': '2025-09-30 22:33:02.546521', 'step': 16902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:02.578996', 'step': 16902, 'epoch': 3} {'type': 'loss', 'content': 0.04694391414523125, 'timestamp': '2025-09-30 22:33:02.586832', 'step': 16903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:02.621668', 'step': 16903, 'epoch': 3} {'type': 'loss', 'content': 0.05131279677152634, 'timestamp': '2025-09-30 22:33:02.649713', 'step': 16904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.680961', 'step': 16904, 'epoch': 3} {'type': 'loss', 'content': 0.058021944016218185, 'timestamp': '2025-09-30 22:33:02.685047', 'step': 16905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:02.715394', 'step': 16905, 'epoch': 3} {'type': 'loss', 'content': 0.04347715899348259, 'timestamp': '2025-09-30 22:33:02.722959', 'step': 16906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.762761', 'step': 16906, 'epoch': 3} {'type': 'loss', 'content': 0.02295275591313839, 'timestamp': '2025-09-30 22:33:02.766376', 'step': 16907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:02.806109', 'step': 16907, 'epoch': 3} {'type': 'loss', 'content': 0.076052725315094, 'timestamp': '2025-09-30 22:33:02.830339', 'step': 16908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:02.866678', 'step': 16908, 'epoch': 3} {'type': 'loss', 'content': 0.13497014343738556, 'timestamp': '2025-09-30 22:33:02.879154', 'step': 16909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:02.928057', 'step': 16909, 'epoch': 3} {'type': 'loss', 'content': 0.046725910156965256, 'timestamp': '2025-09-30 22:33:02.930812', 'step': 16910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:02.961289', 'step': 16910, 'epoch': 3} {'type': 'loss', 'content': 0.06571739912033081, 'timestamp': '2025-09-30 22:33:02.968052', 'step': 16911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.004242', 'step': 16911, 'epoch': 3} {'type': 'loss', 'content': 0.03860727697610855, 'timestamp': '2025-09-30 22:33:03.028650', 'step': 16912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.059028', 'step': 16912, 'epoch': 3} {'type': 'loss', 'content': 0.013035415671765804, 'timestamp': '2025-09-30 22:33:03.068711', 'step': 16913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:03.098819', 'step': 16913, 'epoch': 3} {'type': 'loss', 'content': 0.05758661404252052, 'timestamp': '2025-09-30 22:33:03.101287', 'step': 16914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.137173', 'step': 16914, 'epoch': 3} {'type': 'loss', 'content': 0.09931698441505432, 'timestamp': '2025-09-30 22:33:03.142905', 'step': 16915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.184718', 'step': 16915, 'epoch': 3} {'type': 'loss', 'content': 0.07374458014965057, 'timestamp': '2025-09-30 22:33:03.210710', 'step': 16916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:03.246468', 'step': 16916, 'epoch': 3} {'type': 'loss', 'content': 0.11875991523265839, 'timestamp': '2025-09-30 22:33:03.250389', 'step': 16917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:03.283149', 'step': 16917, 'epoch': 3} {'type': 'loss', 'content': 0.02752455323934555, 'timestamp': '2025-09-30 22:33:03.286907', 'step': 16918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.320385', 'step': 16918, 'epoch': 3} {'type': 'loss', 'content': 0.028777755796909332, 'timestamp': '2025-09-30 22:33:03.324772', 'step': 16919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:03.357748', 'step': 16919, 'epoch': 3} {'type': 'loss', 'content': 0.04237402603030205, 'timestamp': '2025-09-30 22:33:03.384354', 'step': 16920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.418024', 'step': 16920, 'epoch': 3} {'type': 'loss', 'content': 0.018071373924613, 'timestamp': '2025-09-30 22:33:03.422308', 'step': 16921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:03.462730', 'step': 16921, 'epoch': 3} {'type': 'loss', 'content': 0.08500021696090698, 'timestamp': '2025-09-30 22:33:03.467840', 'step': 16922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.500411', 'step': 16922, 'epoch': 3} {'type': 'loss', 'content': 0.10875027626752853, 'timestamp': '2025-09-30 22:33:03.503763', 'step': 16923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:03.541838', 'step': 16923, 'epoch': 3} {'type': 'loss', 'content': 0.07067079097032547, 'timestamp': '2025-09-30 22:33:03.570516', 'step': 16924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.602019', 'step': 16924, 'epoch': 3} {'type': 'loss', 'content': 0.03905114531517029, 'timestamp': '2025-09-30 22:33:03.606116', 'step': 16925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.650764', 'step': 16925, 'epoch': 3} {'type': 'loss', 'content': 0.05123266577720642, 'timestamp': '2025-09-30 22:33:03.655472', 'step': 16926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:03.689699', 'step': 16926, 'epoch': 3} {'type': 'loss', 'content': 0.075458824634552, 'timestamp': '2025-09-30 22:33:03.692787', 'step': 16927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:03.729118', 'step': 16927, 'epoch': 3} {'type': 'loss', 'content': 0.09308397769927979, 'timestamp': '2025-09-30 22:33:03.754284', 'step': 16928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.784852', 'step': 16928, 'epoch': 3} {'type': 'loss', 'content': 0.04984722286462784, 'timestamp': '2025-09-30 22:33:03.790162', 'step': 16929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.846711', 'step': 16929, 'epoch': 3} {'type': 'loss', 'content': 0.12320630252361298, 'timestamp': '2025-09-30 22:33:03.860677', 'step': 16930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:03.897842', 'step': 16930, 'epoch': 3} {'type': 'loss', 'content': 0.04278259724378586, 'timestamp': '2025-09-30 22:33:03.914444', 'step': 16931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:03.949693', 'step': 16931, 'epoch': 3} {'type': 'loss', 'content': 0.05136681720614433, 'timestamp': '2025-09-30 22:33:03.974076', 'step': 16932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:04.006859', 'step': 16932, 'epoch': 3} {'type': 'loss', 'content': 0.11838585138320923, 'timestamp': '2025-09-30 22:33:04.023133', 'step': 16933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.058345', 'step': 16933, 'epoch': 3} {'type': 'loss', 'content': 0.06065051257610321, 'timestamp': '2025-09-30 22:33:04.062784', 'step': 16934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:04.093027', 'step': 16934, 'epoch': 3} {'type': 'loss', 'content': 0.082741379737854, 'timestamp': '2025-09-30 22:33:04.096408', 'step': 16935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.129297', 'step': 16935, 'epoch': 3} {'type': 'loss', 'content': 0.030869770795106888, 'timestamp': '2025-09-30 22:33:04.153835', 'step': 16936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.184578', 'step': 16936, 'epoch': 3} {'type': 'loss', 'content': 0.052385058254003525, 'timestamp': '2025-09-30 22:33:04.199715', 'step': 16937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:04.231771', 'step': 16937, 'epoch': 3} {'type': 'loss', 'content': 0.09163474291563034, 'timestamp': '2025-09-30 22:33:04.237043', 'step': 16938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.273006', 'step': 16938, 'epoch': 3} {'type': 'loss', 'content': 0.0388028621673584, 'timestamp': '2025-09-30 22:33:04.276021', 'step': 16939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.306407', 'step': 16939, 'epoch': 3} {'type': 'loss', 'content': 0.1421789675951004, 'timestamp': '2025-09-30 22:33:04.330727', 'step': 16940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.360560', 'step': 16940, 'epoch': 3} {'type': 'loss', 'content': 0.05448899790644646, 'timestamp': '2025-09-30 22:33:04.363797', 'step': 16941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.410927', 'step': 16941, 'epoch': 3} {'type': 'loss', 'content': 0.08425529301166534, 'timestamp': '2025-09-30 22:33:04.421165', 'step': 16942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.462185', 'step': 16942, 'epoch': 3} {'type': 'loss', 'content': 0.08598224818706512, 'timestamp': '2025-09-30 22:33:04.466282', 'step': 16943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:04.500328', 'step': 16943, 'epoch': 3} {'type': 'loss', 'content': 0.07831048220396042, 'timestamp': '2025-09-30 22:33:04.528795', 'step': 16944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:04.561770', 'step': 16944, 'epoch': 3} {'type': 'loss', 'content': 0.08864167332649231, 'timestamp': '2025-09-30 22:33:04.565976', 'step': 16945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:04.596835', 'step': 16945, 'epoch': 3} {'type': 'loss', 'content': 0.062111884355545044, 'timestamp': '2025-09-30 22:33:04.605544', 'step': 16946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.641532', 'step': 16946, 'epoch': 3} {'type': 'loss', 'content': 0.05208700895309448, 'timestamp': '2025-09-30 22:33:04.645976', 'step': 16947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.687330', 'step': 16947, 'epoch': 3} {'type': 'loss', 'content': 0.05989895761013031, 'timestamp': '2025-09-30 22:33:04.717175', 'step': 16948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.753118', 'step': 16948, 'epoch': 3} {'type': 'loss', 'content': 0.05197524651885033, 'timestamp': '2025-09-30 22:33:04.758662', 'step': 16949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.792744', 'step': 16949, 'epoch': 3} {'type': 'loss', 'content': 0.06199406087398529, 'timestamp': '2025-09-30 22:33:04.798133', 'step': 16950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.830714', 'step': 16950, 'epoch': 3} {'type': 'loss', 'content': 0.01394939050078392, 'timestamp': '2025-09-30 22:33:04.840995', 'step': 16951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.873338', 'step': 16951, 'epoch': 3} {'type': 'loss', 'content': 0.06154480576515198, 'timestamp': '2025-09-30 22:33:04.899460', 'step': 16952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:04.960289', 'step': 16952, 'epoch': 3} {'type': 'loss', 'content': 0.12624116241931915, 'timestamp': '2025-09-30 22:33:04.964802', 'step': 16953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:04.996918', 'step': 16953, 'epoch': 3} {'type': 'loss', 'content': 0.10319245606660843, 'timestamp': '2025-09-30 22:33:05.009000', 'step': 16954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:05.047214', 'step': 16954, 'epoch': 3} {'type': 'loss', 'content': 0.05626341328024864, 'timestamp': '2025-09-30 22:33:05.056753', 'step': 16955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:05.088302', 'step': 16955, 'epoch': 3} {'type': 'loss', 'content': 0.12930184602737427, 'timestamp': '2025-09-30 22:33:05.113598', 'step': 16956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.144509', 'step': 16956, 'epoch': 3} {'type': 'loss', 'content': 0.06482096761465073, 'timestamp': '2025-09-30 22:33:05.153036', 'step': 16957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.189440', 'step': 16957, 'epoch': 3} {'type': 'loss', 'content': 0.08504936844110489, 'timestamp': '2025-09-30 22:33:05.194153', 'step': 16958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.227446', 'step': 16958, 'epoch': 3} {'type': 'loss', 'content': 0.08512452244758606, 'timestamp': '2025-09-30 22:33:05.232027', 'step': 16959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.264030', 'step': 16959, 'epoch': 3} {'type': 'loss', 'content': 0.06512634456157684, 'timestamp': '2025-09-30 22:33:05.303995', 'step': 16960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.340643', 'step': 16960, 'epoch': 3} {'type': 'loss', 'content': 0.14948393404483795, 'timestamp': '2025-09-30 22:33:05.346398', 'step': 16961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.377623', 'step': 16961, 'epoch': 3} {'type': 'loss', 'content': 0.03899610415101051, 'timestamp': '2025-09-30 22:33:05.383339', 'step': 16962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.428844', 'step': 16962, 'epoch': 3} {'type': 'loss', 'content': 0.06074307858943939, 'timestamp': '2025-09-30 22:33:05.431891', 'step': 16963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:05.470578', 'step': 16963, 'epoch': 3} {'type': 'loss', 'content': 0.043348994106054306, 'timestamp': '2025-09-30 22:33:05.497648', 'step': 16964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:05.530973', 'step': 16964, 'epoch': 3} {'type': 'loss', 'content': 0.06308845430612564, 'timestamp': '2025-09-30 22:33:05.537307', 'step': 16965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.569759', 'step': 16965, 'epoch': 3} {'type': 'loss', 'content': 0.04442537948489189, 'timestamp': '2025-09-30 22:33:05.576242', 'step': 16966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.609703', 'step': 16966, 'epoch': 3} {'type': 'loss', 'content': 0.10568299144506454, 'timestamp': '2025-09-30 22:33:05.617247', 'step': 16967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.654606', 'step': 16967, 'epoch': 3} {'type': 'loss', 'content': 0.07376939803361893, 'timestamp': '2025-09-30 22:33:05.685433', 'step': 16968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.715637', 'step': 16968, 'epoch': 3} {'type': 'loss', 'content': 0.04944588616490364, 'timestamp': '2025-09-30 22:33:05.722874', 'step': 16969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.753340', 'step': 16969, 'epoch': 3} {'type': 'loss', 'content': 0.07085879147052765, 'timestamp': '2025-09-30 22:33:05.757098', 'step': 16970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.787692', 'step': 16970, 'epoch': 3} {'type': 'loss', 'content': 0.04655542969703674, 'timestamp': '2025-09-30 22:33:05.792058', 'step': 16971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:05.823324', 'step': 16971, 'epoch': 3} {'type': 'loss', 'content': 0.04145631566643715, 'timestamp': '2025-09-30 22:33:05.850045', 'step': 16972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:05.888545', 'step': 16972, 'epoch': 3} {'type': 'loss', 'content': 0.18102554976940155, 'timestamp': '2025-09-30 22:33:05.905586', 'step': 16973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:05.946277', 'step': 16973, 'epoch': 3} {'type': 'loss', 'content': 0.0840294361114502, 'timestamp': '2025-09-30 22:33:05.948995', 'step': 16974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:05.980331', 'step': 16974, 'epoch': 3} {'type': 'loss', 'content': 0.03571334108710289, 'timestamp': '2025-09-30 22:33:05.983305', 'step': 16975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.014008', 'step': 16975, 'epoch': 3} {'type': 'loss', 'content': 0.05464012920856476, 'timestamp': '2025-09-30 22:33:06.040499', 'step': 16976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:06.071988', 'step': 16976, 'epoch': 3} {'type': 'loss', 'content': 0.02262652851641178, 'timestamp': '2025-09-30 22:33:06.076047', 'step': 16977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:06.109621', 'step': 16977, 'epoch': 3} {'type': 'loss', 'content': 0.1481265127658844, 'timestamp': '2025-09-30 22:33:06.120361', 'step': 16978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.160114', 'step': 16978, 'epoch': 3} {'type': 'loss', 'content': 0.03584560379385948, 'timestamp': '2025-09-30 22:33:06.163883', 'step': 16979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.198757', 'step': 16979, 'epoch': 3} {'type': 'loss', 'content': 0.07789785414934158, 'timestamp': '2025-09-30 22:33:06.228610', 'step': 16980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:06.259114', 'step': 16980, 'epoch': 3} {'type': 'loss', 'content': 0.055120643228292465, 'timestamp': '2025-09-30 22:33:06.263926', 'step': 16981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:06.295018', 'step': 16981, 'epoch': 3} {'type': 'loss', 'content': 0.08436474204063416, 'timestamp': '2025-09-30 22:33:06.300185', 'step': 16982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:06.331778', 'step': 16982, 'epoch': 3} {'type': 'loss', 'content': 0.07467774301767349, 'timestamp': '2025-09-30 22:33:06.336596', 'step': 16983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.380318', 'step': 16983, 'epoch': 3} {'type': 'loss', 'content': 0.13538123667240143, 'timestamp': '2025-09-30 22:33:06.405499', 'step': 16984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:06.436628', 'step': 16984, 'epoch': 3} {'type': 'loss', 'content': 0.08914024382829666, 'timestamp': '2025-09-30 22:33:06.441049', 'step': 16985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:06.473162', 'step': 16985, 'epoch': 3} {'type': 'loss', 'content': 0.04148925468325615, 'timestamp': '2025-09-30 22:33:06.477599', 'step': 16986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:06.510351', 'step': 16986, 'epoch': 3} {'type': 'loss', 'content': 0.08051466941833496, 'timestamp': '2025-09-30 22:33:06.515039', 'step': 16987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.549668', 'step': 16987, 'epoch': 3} {'type': 'loss', 'content': 0.07939200103282928, 'timestamp': '2025-09-30 22:33:06.587176', 'step': 16988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.625539', 'step': 16988, 'epoch': 3} {'type': 'loss', 'content': 0.04436483234167099, 'timestamp': '2025-09-30 22:33:06.628568', 'step': 16989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:06.660033', 'step': 16989, 'epoch': 3} {'type': 'loss', 'content': 0.06591597944498062, 'timestamp': '2025-09-30 22:33:06.663715', 'step': 16990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:06.708824', 'step': 16990, 'epoch': 3} {'type': 'loss', 'content': 0.11476218700408936, 'timestamp': '2025-09-30 22:33:06.713120', 'step': 16991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:06.747173', 'step': 16991, 'epoch': 3} {'type': 'loss', 'content': 0.05049890652298927, 'timestamp': '2025-09-30 22:33:06.772870', 'step': 16992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:06.814891', 'step': 16992, 'epoch': 3} {'type': 'loss', 'content': 0.05911387875676155, 'timestamp': '2025-09-30 22:33:06.819924', 'step': 16993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:06.855338', 'step': 16993, 'epoch': 3} {'type': 'loss', 'content': 0.07528278231620789, 'timestamp': '2025-09-30 22:33:06.859110', 'step': 16994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:06.890786', 'step': 16994, 'epoch': 3} {'type': 'loss', 'content': 0.05119462311267853, 'timestamp': '2025-09-30 22:33:06.901338', 'step': 16995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:06.932899', 'step': 16995, 'epoch': 3} {'type': 'loss', 'content': 0.055499497801065445, 'timestamp': '2025-09-30 22:33:06.963696', 'step': 16996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:07.007890', 'step': 16996, 'epoch': 3} {'type': 'loss', 'content': 0.10323604941368103, 'timestamp': '2025-09-30 22:33:07.012778', 'step': 16997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:07.043925', 'step': 16997, 'epoch': 3} {'type': 'loss', 'content': 0.1343996375799179, 'timestamp': '2025-09-30 22:33:07.049329', 'step': 16998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:07.081359', 'step': 16998, 'epoch': 3} {'type': 'loss', 'content': 0.10774817317724228, 'timestamp': '2025-09-30 22:33:07.085799', 'step': 16999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:07.117988', 'step': 16999, 'epoch': 3} {'type': 'loss', 'content': 0.09390215575695038, 'timestamp': '2025-09-30 22:33:07.142205', 'step': 17000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17000', 'timestamp': '2025-09-30 22:33:12.255193', 'step': 17000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:12.298270', 'step': 17000, 'epoch': 3} {'type': 'loss', 'content': 0.07713248580694199, 'timestamp': '2025-09-30 22:33:12.301461', 'step': 17001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:12.337778', 'step': 17001, 'epoch': 3} {'type': 'loss', 'content': 0.080954909324646, 'timestamp': '2025-09-30 22:33:12.341722', 'step': 17002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.372271', 'step': 17002, 'epoch': 3} {'type': 'loss', 'content': 0.11428720504045486, 'timestamp': '2025-09-30 22:33:12.376660', 'step': 17003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.408456', 'step': 17003, 'epoch': 3} {'type': 'loss', 'content': 0.0678284540772438, 'timestamp': '2025-09-30 22:33:12.433477', 'step': 17004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:12.464167', 'step': 17004, 'epoch': 3} {'type': 'loss', 'content': 0.042866192758083344, 'timestamp': '2025-09-30 22:33:12.474674', 'step': 17005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:12.508210', 'step': 17005, 'epoch': 3} {'type': 'loss', 'content': 0.03941021487116814, 'timestamp': '2025-09-30 22:33:12.513453', 'step': 17006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.546265', 'step': 17006, 'epoch': 3} {'type': 'loss', 'content': 0.09798721224069595, 'timestamp': '2025-09-30 22:33:12.551508', 'step': 17007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:12.584726', 'step': 17007, 'epoch': 3} {'type': 'loss', 'content': 0.051625244319438934, 'timestamp': '2025-09-30 22:33:12.611764', 'step': 17008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.650244', 'step': 17008, 'epoch': 3} {'type': 'loss', 'content': 0.058195848017930984, 'timestamp': '2025-09-30 22:33:12.652911', 'step': 17009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.687596', 'step': 17009, 'epoch': 3} {'type': 'loss', 'content': 0.02245851419866085, 'timestamp': '2025-09-30 22:33:12.690496', 'step': 17010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.725380', 'step': 17010, 'epoch': 3} {'type': 'loss', 'content': 0.09323114156723022, 'timestamp': '2025-09-30 22:33:12.728701', 'step': 17011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.761137', 'step': 17011, 'epoch': 3} {'type': 'loss', 'content': 0.08153161406517029, 'timestamp': '2025-09-30 22:33:12.785744', 'step': 17012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:12.818710', 'step': 17012, 'epoch': 3} {'type': 'loss', 'content': 0.06447015702724457, 'timestamp': '2025-09-30 22:33:12.823497', 'step': 17013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.855682', 'step': 17013, 'epoch': 3} {'type': 'loss', 'content': 0.1646980494260788, 'timestamp': '2025-09-30 22:33:12.860316', 'step': 17014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.891541', 'step': 17014, 'epoch': 3} {'type': 'loss', 'content': 0.04271622747182846, 'timestamp': '2025-09-30 22:33:12.896222', 'step': 17015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:12.929939', 'step': 17015, 'epoch': 3} {'type': 'loss', 'content': 0.055112313479185104, 'timestamp': '2025-09-30 22:33:12.955313', 'step': 17016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:12.986766', 'step': 17016, 'epoch': 3} {'type': 'loss', 'content': 0.1145678460597992, 'timestamp': '2025-09-30 22:33:12.990824', 'step': 17017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:13.021662', 'step': 17017, 'epoch': 3} {'type': 'loss', 'content': 0.14232847094535828, 'timestamp': '2025-09-30 22:33:13.024499', 'step': 17018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.058172', 'step': 17018, 'epoch': 3} {'type': 'loss', 'content': 0.09073697030544281, 'timestamp': '2025-09-30 22:33:13.065181', 'step': 17019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.102171', 'step': 17019, 'epoch': 3} {'type': 'loss', 'content': 0.130403071641922, 'timestamp': '2025-09-30 22:33:13.128214', 'step': 17020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.165005', 'step': 17020, 'epoch': 3} {'type': 'loss', 'content': 0.08833304792642593, 'timestamp': '2025-09-30 22:33:13.170611', 'step': 17021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.205001', 'step': 17021, 'epoch': 3} {'type': 'loss', 'content': 0.10854141414165497, 'timestamp': '2025-09-30 22:33:13.207962', 'step': 17022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:13.244250', 'step': 17022, 'epoch': 3} {'type': 'loss', 'content': 0.06956980377435684, 'timestamp': '2025-09-30 22:33:13.247496', 'step': 17023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:13.287089', 'step': 17023, 'epoch': 3} {'type': 'loss', 'content': 0.04556180164217949, 'timestamp': '2025-09-30 22:33:13.323560', 'step': 17024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.355104', 'step': 17024, 'epoch': 3} {'type': 'loss', 'content': 0.08590345829725266, 'timestamp': '2025-09-30 22:33:13.358024', 'step': 17025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.389962', 'step': 17025, 'epoch': 3} {'type': 'loss', 'content': 0.17838311195373535, 'timestamp': '2025-09-30 22:33:13.404999', 'step': 17026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.437165', 'step': 17026, 'epoch': 3} {'type': 'loss', 'content': 0.11026450991630554, 'timestamp': '2025-09-30 22:33:13.449944', 'step': 17027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:13.482607', 'step': 17027, 'epoch': 3} {'type': 'loss', 'content': 0.06216833367943764, 'timestamp': '2025-09-30 22:33:13.512186', 'step': 17028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:13.557209', 'step': 17028, 'epoch': 3} {'type': 'loss', 'content': 0.05732934549450874, 'timestamp': '2025-09-30 22:33:13.564122', 'step': 17029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.597112', 'step': 17029, 'epoch': 3} {'type': 'loss', 'content': 0.0802735909819603, 'timestamp': '2025-09-30 22:33:13.604423', 'step': 17030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.637564', 'step': 17030, 'epoch': 3} {'type': 'loss', 'content': 0.12083320319652557, 'timestamp': '2025-09-30 22:33:13.642133', 'step': 17031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:13.684942', 'step': 17031, 'epoch': 3} {'type': 'loss', 'content': 0.07326075434684753, 'timestamp': '2025-09-30 22:33:13.720139', 'step': 17032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.751168', 'step': 17032, 'epoch': 3} {'type': 'loss', 'content': 0.08976811170578003, 'timestamp': '2025-09-30 22:33:13.754119', 'step': 17033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:13.791511', 'step': 17033, 'epoch': 3} {'type': 'loss', 'content': 0.13094894587993622, 'timestamp': '2025-09-30 22:33:13.800667', 'step': 17034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:13.843628', 'step': 17034, 'epoch': 3} {'type': 'loss', 'content': 0.0946066677570343, 'timestamp': '2025-09-30 22:33:13.849568', 'step': 17035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:13.887686', 'step': 17035, 'epoch': 3} {'type': 'loss', 'content': 0.0958457887172699, 'timestamp': '2025-09-30 22:33:13.913736', 'step': 17036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:13.947679', 'step': 17036, 'epoch': 3} {'type': 'loss', 'content': 0.0895412266254425, 'timestamp': '2025-09-30 22:33:13.959774', 'step': 17037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:13.996559', 'step': 17037, 'epoch': 3} {'type': 'loss', 'content': 0.08913907408714294, 'timestamp': '2025-09-30 22:33:14.010755', 'step': 17038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.047356', 'step': 17038, 'epoch': 3} {'type': 'loss', 'content': 0.035805873572826385, 'timestamp': '2025-09-30 22:33:14.051157', 'step': 17039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.085137', 'step': 17039, 'epoch': 3} {'type': 'loss', 'content': 0.0637938380241394, 'timestamp': '2025-09-30 22:33:14.121620', 'step': 17040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.156940', 'step': 17040, 'epoch': 3} {'type': 'loss', 'content': 0.05114629119634628, 'timestamp': '2025-09-30 22:33:14.164944', 'step': 17041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:14.201690', 'step': 17041, 'epoch': 3} {'type': 'loss', 'content': 0.06017017364501953, 'timestamp': '2025-09-30 22:33:14.212733', 'step': 17042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.244325', 'step': 17042, 'epoch': 3} {'type': 'loss', 'content': 0.08269418776035309, 'timestamp': '2025-09-30 22:33:14.263816', 'step': 17043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:14.297272', 'step': 17043, 'epoch': 3} {'type': 'loss', 'content': 0.047721996903419495, 'timestamp': '2025-09-30 22:33:14.325068', 'step': 17044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:14.361081', 'step': 17044, 'epoch': 3} {'type': 'loss', 'content': 0.07210531085729599, 'timestamp': '2025-09-30 22:33:14.363430', 'step': 17045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.400700', 'step': 17045, 'epoch': 3} {'type': 'loss', 'content': 0.09714056551456451, 'timestamp': '2025-09-30 22:33:14.405946', 'step': 17046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.438162', 'step': 17046, 'epoch': 3} {'type': 'loss', 'content': 0.0713760256767273, 'timestamp': '2025-09-30 22:33:14.443047', 'step': 17047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.478948', 'step': 17047, 'epoch': 3} {'type': 'loss', 'content': 0.19693642854690552, 'timestamp': '2025-09-30 22:33:14.520921', 'step': 17048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:14.564595', 'step': 17048, 'epoch': 3} {'type': 'loss', 'content': 0.07791014015674591, 'timestamp': '2025-09-30 22:33:14.581824', 'step': 17049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.624474', 'step': 17049, 'epoch': 3} {'type': 'loss', 'content': 0.06526065617799759, 'timestamp': '2025-09-30 22:33:14.633427', 'step': 17050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.675189', 'step': 17050, 'epoch': 3} {'type': 'loss', 'content': 0.09618604183197021, 'timestamp': '2025-09-30 22:33:14.688270', 'step': 17051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.718297', 'step': 17051, 'epoch': 3} {'type': 'loss', 'content': 0.15352767705917358, 'timestamp': '2025-09-30 22:33:14.746622', 'step': 17052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:14.788363', 'step': 17052, 'epoch': 3} {'type': 'loss', 'content': 0.049164265394210815, 'timestamp': '2025-09-30 22:33:14.791397', 'step': 17053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.824281', 'step': 17053, 'epoch': 3} {'type': 'loss', 'content': 0.05003845691680908, 'timestamp': '2025-09-30 22:33:14.830119', 'step': 17054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:14.877432', 'step': 17054, 'epoch': 3} {'type': 'loss', 'content': 0.17004777491092682, 'timestamp': '2025-09-30 22:33:14.880262', 'step': 17055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:14.913084', 'step': 17055, 'epoch': 3} {'type': 'loss', 'content': 0.01706375554203987, 'timestamp': '2025-09-30 22:33:14.941602', 'step': 17056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:14.978769', 'step': 17056, 'epoch': 3} {'type': 'loss', 'content': 0.10593549907207489, 'timestamp': '2025-09-30 22:33:14.992787', 'step': 17057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.030417', 'step': 17057, 'epoch': 3} {'type': 'loss', 'content': 0.13976450264453888, 'timestamp': '2025-09-30 22:33:15.033002', 'step': 17058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.076840', 'step': 17058, 'epoch': 3} {'type': 'loss', 'content': 0.07154805958271027, 'timestamp': '2025-09-30 22:33:15.080692', 'step': 17059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:15.116815', 'step': 17059, 'epoch': 3} {'type': 'loss', 'content': 0.0439135767519474, 'timestamp': '2025-09-30 22:33:15.157008', 'step': 17060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.204839', 'step': 17060, 'epoch': 3} {'type': 'loss', 'content': 0.08956392854452133, 'timestamp': '2025-09-30 22:33:15.210669', 'step': 17061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:15.244918', 'step': 17061, 'epoch': 3} {'type': 'loss', 'content': 0.06854478269815445, 'timestamp': '2025-09-30 22:33:15.254651', 'step': 17062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:15.292169', 'step': 17062, 'epoch': 3} {'type': 'loss', 'content': 0.06391346454620361, 'timestamp': '2025-09-30 22:33:15.302466', 'step': 17063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:15.341675', 'step': 17063, 'epoch': 3} {'type': 'loss', 'content': 0.08835899084806442, 'timestamp': '2025-09-30 22:33:15.386626', 'step': 17064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.419993', 'step': 17064, 'epoch': 3} {'type': 'loss', 'content': 0.06405669450759888, 'timestamp': '2025-09-30 22:33:15.433589', 'step': 17065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.466737', 'step': 17065, 'epoch': 3} {'type': 'loss', 'content': 0.05436746031045914, 'timestamp': '2025-09-30 22:33:15.476429', 'step': 17066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:15.542843', 'step': 17066, 'epoch': 3} {'type': 'loss', 'content': 0.11409228295087814, 'timestamp': '2025-09-30 22:33:15.547413', 'step': 17067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.579838', 'step': 17067, 'epoch': 3} {'type': 'loss', 'content': 0.06135886535048485, 'timestamp': '2025-09-30 22:33:15.604933', 'step': 17068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.638446', 'step': 17068, 'epoch': 3} {'type': 'loss', 'content': 0.06995872408151627, 'timestamp': '2025-09-30 22:33:15.647457', 'step': 17069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.679063', 'step': 17069, 'epoch': 3} {'type': 'loss', 'content': 0.07058338820934296, 'timestamp': '2025-09-30 22:33:15.686123', 'step': 17070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:15.729595', 'step': 17070, 'epoch': 3} {'type': 'loss', 'content': 0.10250487178564072, 'timestamp': '2025-09-30 22:33:15.734144', 'step': 17071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.767367', 'step': 17071, 'epoch': 3} {'type': 'loss', 'content': 0.08992521464824677, 'timestamp': '2025-09-30 22:33:15.792280', 'step': 17072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:15.833360', 'step': 17072, 'epoch': 3} {'type': 'loss', 'content': 0.03537001088261604, 'timestamp': '2025-09-30 22:33:15.836507', 'step': 17073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.868326', 'step': 17073, 'epoch': 3} {'type': 'loss', 'content': 0.05800404027104378, 'timestamp': '2025-09-30 22:33:15.877574', 'step': 17074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:15.910058', 'step': 17074, 'epoch': 3} {'type': 'loss', 'content': 0.10514562577009201, 'timestamp': '2025-09-30 22:33:15.912429', 'step': 17075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:15.950382', 'step': 17075, 'epoch': 3} {'type': 'loss', 'content': 0.08715946972370148, 'timestamp': '2025-09-30 22:33:15.975255', 'step': 17076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.008520', 'step': 17076, 'epoch': 3} {'type': 'loss', 'content': 0.05826541408896446, 'timestamp': '2025-09-30 22:33:16.012055', 'step': 17077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:16.046034', 'step': 17077, 'epoch': 3} {'type': 'loss', 'content': 0.02286316454410553, 'timestamp': '2025-09-30 22:33:16.049430', 'step': 17078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.082867', 'step': 17078, 'epoch': 3} {'type': 'loss', 'content': 0.09742755442857742, 'timestamp': '2025-09-30 22:33:16.086676', 'step': 17079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:16.125166', 'step': 17079, 'epoch': 3} {'type': 'loss', 'content': 0.09095127135515213, 'timestamp': '2025-09-30 22:33:16.150373', 'step': 17080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.197412', 'step': 17080, 'epoch': 3} {'type': 'loss', 'content': 0.06518170237541199, 'timestamp': '2025-09-30 22:33:16.200887', 'step': 17081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:16.239280', 'step': 17081, 'epoch': 3} {'type': 'loss', 'content': 0.14351634681224823, 'timestamp': '2025-09-30 22:33:16.242849', 'step': 17082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:16.286040', 'step': 17082, 'epoch': 3} {'type': 'loss', 'content': 0.07462460547685623, 'timestamp': '2025-09-30 22:33:16.295731', 'step': 17083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.328588', 'step': 17083, 'epoch': 3} {'type': 'loss', 'content': 0.12159253656864166, 'timestamp': '2025-09-30 22:33:16.355564', 'step': 17084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.387339', 'step': 17084, 'epoch': 3} {'type': 'loss', 'content': 0.1118764579296112, 'timestamp': '2025-09-30 22:33:16.402802', 'step': 17085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:16.456412', 'step': 17085, 'epoch': 3} {'type': 'loss', 'content': 0.033415619283914566, 'timestamp': '2025-09-30 22:33:16.462753', 'step': 17086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:16.498995', 'step': 17086, 'epoch': 3} {'type': 'loss', 'content': 0.09589529037475586, 'timestamp': '2025-09-30 22:33:16.502500', 'step': 17087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.535356', 'step': 17087, 'epoch': 3} {'type': 'loss', 'content': 0.1287367343902588, 'timestamp': '2025-09-30 22:33:16.563870', 'step': 17088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.596535', 'step': 17088, 'epoch': 3} {'type': 'loss', 'content': 0.12584945559501648, 'timestamp': '2025-09-30 22:33:16.600489', 'step': 17089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:16.640175', 'step': 17089, 'epoch': 3} {'type': 'loss', 'content': 0.08541779965162277, 'timestamp': '2025-09-30 22:33:16.643751', 'step': 17090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:16.680269', 'step': 17090, 'epoch': 3} {'type': 'loss', 'content': 0.015284637920558453, 'timestamp': '2025-09-30 22:33:16.694992', 'step': 17091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:16.735286', 'step': 17091, 'epoch': 3} {'type': 'loss', 'content': 0.14226901531219482, 'timestamp': '2025-09-30 22:33:16.760915', 'step': 17092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:16.791988', 'step': 17092, 'epoch': 3} {'type': 'loss', 'content': 0.14985768496990204, 'timestamp': '2025-09-30 22:33:16.807239', 'step': 17093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:16.839192', 'step': 17093, 'epoch': 3} {'type': 'loss', 'content': 0.10436620563268661, 'timestamp': '2025-09-30 22:33:16.844818', 'step': 17094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:16.877497', 'step': 17094, 'epoch': 3} {'type': 'loss', 'content': 0.11641520261764526, 'timestamp': '2025-09-30 22:33:16.885526', 'step': 17095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:16.927935', 'step': 17095, 'epoch': 3} {'type': 'loss', 'content': 0.10331963747739792, 'timestamp': '2025-09-30 22:33:16.953061', 'step': 17096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:16.985242', 'step': 17096, 'epoch': 3} {'type': 'loss', 'content': 0.08554086834192276, 'timestamp': '2025-09-30 22:33:16.989578', 'step': 17097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.021706', 'step': 17097, 'epoch': 3} {'type': 'loss', 'content': 0.06518270075321198, 'timestamp': '2025-09-30 22:33:17.027228', 'step': 17098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.065773', 'step': 17098, 'epoch': 3} {'type': 'loss', 'content': 0.057832539081573486, 'timestamp': '2025-09-30 22:33:17.070088', 'step': 17099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:17.102288', 'step': 17099, 'epoch': 3} {'type': 'loss', 'content': 0.10317280888557434, 'timestamp': '2025-09-30 22:33:17.126723', 'step': 17100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:17.158869', 'step': 17100, 'epoch': 3} {'type': 'loss', 'content': 0.04550182446837425, 'timestamp': '2025-09-30 22:33:17.163242', 'step': 17101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.194116', 'step': 17101, 'epoch': 3} {'type': 'loss', 'content': 0.16570504009723663, 'timestamp': '2025-09-30 22:33:17.204548', 'step': 17102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.236449', 'step': 17102, 'epoch': 3} {'type': 'loss', 'content': 0.030253084376454353, 'timestamp': '2025-09-30 22:33:17.240549', 'step': 17103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.273692', 'step': 17103, 'epoch': 3} {'type': 'loss', 'content': 0.033391073346138, 'timestamp': '2025-09-30 22:33:17.307934', 'step': 17104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.346305', 'step': 17104, 'epoch': 3} {'type': 'loss', 'content': 0.14812615513801575, 'timestamp': '2025-09-30 22:33:17.351998', 'step': 17105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.386323', 'step': 17105, 'epoch': 3} {'type': 'loss', 'content': 0.08721239864826202, 'timestamp': '2025-09-30 22:33:17.391239', 'step': 17106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.425300', 'step': 17106, 'epoch': 3} {'type': 'loss', 'content': 0.049379248172044754, 'timestamp': '2025-09-30 22:33:17.430466', 'step': 17107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.475372', 'step': 17107, 'epoch': 3} {'type': 'loss', 'content': 0.17991718649864197, 'timestamp': '2025-09-30 22:33:17.500894', 'step': 17108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:17.533910', 'step': 17108, 'epoch': 3} {'type': 'loss', 'content': 0.03755475953221321, 'timestamp': '2025-09-30 22:33:17.539368', 'step': 17109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.575564', 'step': 17109, 'epoch': 3} {'type': 'loss', 'content': 0.070880226790905, 'timestamp': '2025-09-30 22:33:17.587229', 'step': 17110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:17.619866', 'step': 17110, 'epoch': 3} {'type': 'loss', 'content': 0.04585753008723259, 'timestamp': '2025-09-30 22:33:17.624099', 'step': 17111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.665412', 'step': 17111, 'epoch': 3} {'type': 'loss', 'content': 0.06080057844519615, 'timestamp': '2025-09-30 22:33:17.696461', 'step': 17112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.738628', 'step': 17112, 'epoch': 3} {'type': 'loss', 'content': 0.07512212544679642, 'timestamp': '2025-09-30 22:33:17.743713', 'step': 17113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.786181', 'step': 17113, 'epoch': 3} {'type': 'loss', 'content': 0.06929316371679306, 'timestamp': '2025-09-30 22:33:17.790750', 'step': 17114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:17.831426', 'step': 17114, 'epoch': 3} {'type': 'loss', 'content': 0.1276673525571823, 'timestamp': '2025-09-30 22:33:17.835607', 'step': 17115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:17.868614', 'step': 17115, 'epoch': 3} {'type': 'loss', 'content': 0.0877043828368187, 'timestamp': '2025-09-30 22:33:17.901391', 'step': 17116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:17.932129', 'step': 17116, 'epoch': 3} {'type': 'loss', 'content': 0.04138309508562088, 'timestamp': '2025-09-30 22:33:17.936392', 'step': 17117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:17.967656', 'step': 17117, 'epoch': 3} {'type': 'loss', 'content': 0.05531497299671173, 'timestamp': '2025-09-30 22:33:17.985044', 'step': 17118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.016910', 'step': 17118, 'epoch': 3} {'type': 'loss', 'content': 0.09856611490249634, 'timestamp': '2025-09-30 22:33:18.020609', 'step': 17119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:18.054170', 'step': 17119, 'epoch': 3} {'type': 'loss', 'content': 0.05760391056537628, 'timestamp': '2025-09-30 22:33:18.090514', 'step': 17120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:18.121677', 'step': 17120, 'epoch': 3} {'type': 'loss', 'content': 0.03654063493013382, 'timestamp': '2025-09-30 22:33:18.127014', 'step': 17121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:33:18.171305', 'step': 17121, 'epoch': 3} {'type': 'loss', 'content': 0.08866345137357712, 'timestamp': '2025-09-30 22:33:18.176683', 'step': 17122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:18.227072', 'step': 17122, 'epoch': 3} {'type': 'loss', 'content': 0.025251878425478935, 'timestamp': '2025-09-30 22:33:18.233572', 'step': 17123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.268290', 'step': 17123, 'epoch': 3} {'type': 'loss', 'content': 0.04555748030543327, 'timestamp': '2025-09-30 22:33:18.304294', 'step': 17124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.351565', 'step': 17124, 'epoch': 3} {'type': 'loss', 'content': 0.058926913887262344, 'timestamp': '2025-09-30 22:33:18.358899', 'step': 17125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:18.392420', 'step': 17125, 'epoch': 3} {'type': 'loss', 'content': 0.12339547276496887, 'timestamp': '2025-09-30 22:33:18.409545', 'step': 17126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:18.442172', 'step': 17126, 'epoch': 3} {'type': 'loss', 'content': 0.07612068206071854, 'timestamp': '2025-09-30 22:33:18.452833', 'step': 17127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:18.494330', 'step': 17127, 'epoch': 3} {'type': 'loss', 'content': 0.08290525525808334, 'timestamp': '2025-09-30 22:33:18.528155', 'step': 17128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:18.564998', 'step': 17128, 'epoch': 3} {'type': 'loss', 'content': 0.06634154915809631, 'timestamp': '2025-09-30 22:33:18.577260', 'step': 17129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.615275', 'step': 17129, 'epoch': 3} {'type': 'loss', 'content': 0.11614520847797394, 'timestamp': '2025-09-30 22:33:18.618683', 'step': 17130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.650179', 'step': 17130, 'epoch': 3} {'type': 'loss', 'content': 0.08881739526987076, 'timestamp': '2025-09-30 22:33:18.665950', 'step': 17131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.699498', 'step': 17131, 'epoch': 3} {'type': 'loss', 'content': 0.10567767918109894, 'timestamp': '2025-09-30 22:33:18.725374', 'step': 17132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:18.757321', 'step': 17132, 'epoch': 3} {'type': 'loss', 'content': 0.03536803275346756, 'timestamp': '2025-09-30 22:33:18.760497', 'step': 17133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:18.792801', 'step': 17133, 'epoch': 3} {'type': 'loss', 'content': 0.06335215270519257, 'timestamp': '2025-09-30 22:33:18.804991', 'step': 17134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:18.839850', 'step': 17134, 'epoch': 3} {'type': 'loss', 'content': 0.10205788910388947, 'timestamp': '2025-09-30 22:33:18.857617', 'step': 17135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:18.890185', 'step': 17135, 'epoch': 3} {'type': 'loss', 'content': 0.12449029088020325, 'timestamp': '2025-09-30 22:33:18.921878', 'step': 17136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:18.956320', 'step': 17136, 'epoch': 3} {'type': 'loss', 'content': 0.013146847486495972, 'timestamp': '2025-09-30 22:33:18.961466', 'step': 17137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:18.995199', 'step': 17137, 'epoch': 3} {'type': 'loss', 'content': 0.11835415661334991, 'timestamp': '2025-09-30 22:33:19.000948', 'step': 17138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:19.035171', 'step': 17138, 'epoch': 3} {'type': 'loss', 'content': 0.06797605752944946, 'timestamp': '2025-09-30 22:33:19.039777', 'step': 17139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:33:19.073065', 'step': 17139, 'epoch': 3} {'type': 'loss', 'content': 0.04657483845949173, 'timestamp': '2025-09-30 22:33:19.113399', 'step': 17140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:19.146101', 'step': 17140, 'epoch': 3} {'type': 'loss', 'content': 0.027835264801979065, 'timestamp': '2025-09-30 22:33:19.159818', 'step': 17141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.201051', 'step': 17141, 'epoch': 3} {'type': 'loss', 'content': 0.052228689193725586, 'timestamp': '2025-09-30 22:33:19.206933', 'step': 17142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.239523', 'step': 17142, 'epoch': 3} {'type': 'loss', 'content': 0.1327563375234604, 'timestamp': '2025-09-30 22:33:19.243849', 'step': 17143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:19.276519', 'step': 17143, 'epoch': 3} {'type': 'loss', 'content': 0.10505326092243195, 'timestamp': '2025-09-30 22:33:19.301656', 'step': 17144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:19.335074', 'step': 17144, 'epoch': 3} {'type': 'loss', 'content': 0.06057974323630333, 'timestamp': '2025-09-30 22:33:19.347509', 'step': 17145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.380631', 'step': 17145, 'epoch': 3} {'type': 'loss', 'content': 0.09496747702360153, 'timestamp': '2025-09-30 22:33:19.384489', 'step': 17146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.416629', 'step': 17146, 'epoch': 3} {'type': 'loss', 'content': 0.0787946954369545, 'timestamp': '2025-09-30 22:33:19.429483', 'step': 17147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:19.462749', 'step': 17147, 'epoch': 3} {'type': 'loss', 'content': 0.08669871091842651, 'timestamp': '2025-09-30 22:33:19.498776', 'step': 17148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:19.547865', 'step': 17148, 'epoch': 3} {'type': 'loss', 'content': 0.0658816546201706, 'timestamp': '2025-09-30 22:33:19.561213', 'step': 17149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:19.593584', 'step': 17149, 'epoch': 3} {'type': 'loss', 'content': 0.09535007923841476, 'timestamp': '2025-09-30 22:33:19.597683', 'step': 17150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.644008', 'step': 17150, 'epoch': 3} {'type': 'loss', 'content': 0.059285737574100494, 'timestamp': '2025-09-30 22:33:19.662041', 'step': 17151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:19.703163', 'step': 17151, 'epoch': 3} {'type': 'loss', 'content': 0.08529586344957352, 'timestamp': '2025-09-30 22:33:19.737247', 'step': 17152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.770432', 'step': 17152, 'epoch': 3} {'type': 'loss', 'content': 0.040046658366918564, 'timestamp': '2025-09-30 22:33:19.775446', 'step': 17153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:19.816347', 'step': 17153, 'epoch': 3} {'type': 'loss', 'content': 0.08572148531675339, 'timestamp': '2025-09-30 22:33:19.826352', 'step': 17154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:19.858961', 'step': 17154, 'epoch': 3} {'type': 'loss', 'content': 0.0807262510061264, 'timestamp': '2025-09-30 22:33:19.873392', 'step': 17155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:19.915820', 'step': 17155, 'epoch': 3} {'type': 'loss', 'content': 0.05750928446650505, 'timestamp': '2025-09-30 22:33:19.945827', 'step': 17156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:19.980264', 'step': 17156, 'epoch': 3} {'type': 'loss', 'content': 0.14183147251605988, 'timestamp': '2025-09-30 22:33:19.986392', 'step': 17157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:20.019830', 'step': 17157, 'epoch': 3} {'type': 'loss', 'content': 0.06442348659038544, 'timestamp': '2025-09-30 22:33:20.028814', 'step': 17158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:20.089754', 'step': 17158, 'epoch': 3} {'type': 'loss', 'content': 0.05881734937429428, 'timestamp': '2025-09-30 22:33:20.096186', 'step': 17159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:20.132348', 'step': 17159, 'epoch': 3} {'type': 'loss', 'content': 0.021442299708724022, 'timestamp': '2025-09-30 22:33:20.166415', 'step': 17160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:20.205061', 'step': 17160, 'epoch': 3} {'type': 'loss', 'content': 0.10551789402961731, 'timestamp': '2025-09-30 22:33:20.210040', 'step': 17161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:20.250671', 'step': 17161, 'epoch': 3} {'type': 'loss', 'content': 0.06557445228099823, 'timestamp': '2025-09-30 22:33:20.255373', 'step': 17162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:20.290747', 'step': 17162, 'epoch': 3} {'type': 'loss', 'content': 0.12817253172397614, 'timestamp': '2025-09-30 22:33:20.295788', 'step': 17163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:20.328345', 'step': 17163, 'epoch': 3} {'type': 'loss', 'content': 0.022293787449598312, 'timestamp': '2025-09-30 22:33:20.367709', 'step': 17164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:20.412408', 'step': 17164, 'epoch': 3} {'type': 'loss', 'content': 0.14752298593521118, 'timestamp': '2025-09-30 22:33:20.419065', 'step': 17165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:20.452042', 'step': 17165, 'epoch': 3} {'type': 'loss', 'content': 0.07668550312519073, 'timestamp': '2025-09-30 22:33:20.469392', 'step': 17166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:20.508572', 'step': 17166, 'epoch': 3} {'type': 'loss', 'content': 0.05505836382508278, 'timestamp': '2025-09-30 22:33:20.517451', 'step': 17167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:20.566190', 'step': 17167, 'epoch': 3} {'type': 'loss', 'content': 0.060988765209913254, 'timestamp': '2025-09-30 22:33:20.602273', 'step': 17168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:20.635139', 'step': 17168, 'epoch': 3} {'type': 'loss', 'content': 0.045720040798187256, 'timestamp': '2025-09-30 22:33:20.639721', 'step': 17169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:20.678812', 'step': 17169, 'epoch': 3} {'type': 'loss', 'content': 0.09888216853141785, 'timestamp': '2025-09-30 22:33:20.683850', 'step': 17170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:20.721947', 'step': 17170, 'epoch': 3} {'type': 'loss', 'content': 0.04140189662575722, 'timestamp': '2025-09-30 22:33:20.731009', 'step': 17171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:20.764572', 'step': 17171, 'epoch': 3} {'type': 'loss', 'content': 0.0788145661354065, 'timestamp': '2025-09-30 22:33:20.792298', 'step': 17172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:20.836625', 'step': 17172, 'epoch': 3} {'type': 'loss', 'content': 0.028564635664224625, 'timestamp': '2025-09-30 22:33:20.856710', 'step': 17173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:20.890667', 'step': 17173, 'epoch': 3} {'type': 'loss', 'content': 0.06200223043560982, 'timestamp': '2025-09-30 22:33:20.906411', 'step': 17174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:33:20.942610', 'step': 17174, 'epoch': 3} {'type': 'loss', 'content': 0.08142632246017456, 'timestamp': '2025-09-30 22:33:20.946912', 'step': 17175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:20.981336', 'step': 17175, 'epoch': 3} {'type': 'loss', 'content': 0.061572447419166565, 'timestamp': '2025-09-30 22:33:21.016591', 'step': 17176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.047746', 'step': 17176, 'epoch': 3} {'type': 'loss', 'content': 0.11881009489297867, 'timestamp': '2025-09-30 22:33:21.064224', 'step': 17177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.106868', 'step': 17177, 'epoch': 3} {'type': 'loss', 'content': 0.01368984580039978, 'timestamp': '2025-09-30 22:33:21.112123', 'step': 17178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.156714', 'step': 17178, 'epoch': 3} {'type': 'loss', 'content': 0.1091725155711174, 'timestamp': '2025-09-30 22:33:21.163401', 'step': 17179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.199100', 'step': 17179, 'epoch': 3} {'type': 'loss', 'content': 0.023597735911607742, 'timestamp': '2025-09-30 22:33:21.228811', 'step': 17180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.262462', 'step': 17180, 'epoch': 3} {'type': 'loss', 'content': 0.11512391269207001, 'timestamp': '2025-09-30 22:33:21.278698', 'step': 17181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.324144', 'step': 17181, 'epoch': 3} {'type': 'loss', 'content': 0.08269393444061279, 'timestamp': '2025-09-30 22:33:21.338596', 'step': 17182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.376484', 'step': 17182, 'epoch': 3} {'type': 'loss', 'content': 0.058312591165304184, 'timestamp': '2025-09-30 22:33:21.381337', 'step': 17183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.426352', 'step': 17183, 'epoch': 3} {'type': 'loss', 'content': 0.065697580575943, 'timestamp': '2025-09-30 22:33:21.455326', 'step': 17184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.487416', 'step': 17184, 'epoch': 3} {'type': 'loss', 'content': 0.0976296216249466, 'timestamp': '2025-09-30 22:33:21.493019', 'step': 17185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.525477', 'step': 17185, 'epoch': 3} {'type': 'loss', 'content': 0.08589453995227814, 'timestamp': '2025-09-30 22:33:21.529485', 'step': 17186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.562057', 'step': 17186, 'epoch': 3} {'type': 'loss', 'content': 0.06175651401281357, 'timestamp': '2025-09-30 22:33:21.584121', 'step': 17187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.617874', 'step': 17187, 'epoch': 3} {'type': 'loss', 'content': 0.017482928931713104, 'timestamp': '2025-09-30 22:33:21.646717', 'step': 17188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.680936', 'step': 17188, 'epoch': 3} {'type': 'loss', 'content': 0.11603176593780518, 'timestamp': '2025-09-30 22:33:21.686593', 'step': 17189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.718268', 'step': 17189, 'epoch': 3} {'type': 'loss', 'content': 0.04484321177005768, 'timestamp': '2025-09-30 22:33:21.733979', 'step': 17190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.766711', 'step': 17190, 'epoch': 3} {'type': 'loss', 'content': 0.08886725455522537, 'timestamp': '2025-09-30 22:33:21.772116', 'step': 17191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.804450', 'step': 17191, 'epoch': 3} {'type': 'loss', 'content': 0.07649791985750198, 'timestamp': '2025-09-30 22:33:21.831789', 'step': 17192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:21.863652', 'step': 17192, 'epoch': 3} {'type': 'loss', 'content': 0.05032547563314438, 'timestamp': '2025-09-30 22:33:21.867661', 'step': 17193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:21.899089', 'step': 17193, 'epoch': 3} {'type': 'loss', 'content': 0.05358520895242691, 'timestamp': '2025-09-30 22:33:21.906626', 'step': 17194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.944398', 'step': 17194, 'epoch': 3} {'type': 'loss', 'content': 0.03667939826846123, 'timestamp': '2025-09-30 22:33:21.957022', 'step': 17195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:21.989158', 'step': 17195, 'epoch': 3} {'type': 'loss', 'content': 0.13820934295654297, 'timestamp': '2025-09-30 22:33:22.015631', 'step': 17196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.047997', 'step': 17196, 'epoch': 3} {'type': 'loss', 'content': 0.05464210733771324, 'timestamp': '2025-09-30 22:33:22.059612', 'step': 17197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.097127', 'step': 17197, 'epoch': 3} {'type': 'loss', 'content': 0.11645742505788803, 'timestamp': '2025-09-30 22:33:22.100110', 'step': 17198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.147109', 'step': 17198, 'epoch': 3} {'type': 'loss', 'content': 0.08103340864181519, 'timestamp': '2025-09-30 22:33:22.150528', 'step': 17199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:22.188625', 'step': 17199, 'epoch': 3} {'type': 'loss', 'content': 0.1113848015666008, 'timestamp': '2025-09-30 22:33:22.226974', 'step': 17200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.258587', 'step': 17200, 'epoch': 3} {'type': 'loss', 'content': 0.08504872769117355, 'timestamp': '2025-09-30 22:33:22.261848', 'step': 17201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.295481', 'step': 17201, 'epoch': 3} {'type': 'loss', 'content': 0.06173957884311676, 'timestamp': '2025-09-30 22:33:22.301513', 'step': 17202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.333985', 'step': 17202, 'epoch': 3} {'type': 'loss', 'content': 0.061514198780059814, 'timestamp': '2025-09-30 22:33:22.338584', 'step': 17203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.387907', 'step': 17203, 'epoch': 3} {'type': 'loss', 'content': 0.07919825613498688, 'timestamp': '2025-09-30 22:33:22.413575', 'step': 17204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.452013', 'step': 17204, 'epoch': 3} {'type': 'loss', 'content': 0.06540361791849136, 'timestamp': '2025-09-30 22:33:22.457025', 'step': 17205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:22.495721', 'step': 17205, 'epoch': 3} {'type': 'loss', 'content': 0.02798730507493019, 'timestamp': '2025-09-30 22:33:22.498966', 'step': 17206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:22.532743', 'step': 17206, 'epoch': 3} {'type': 'loss', 'content': 0.04156218469142914, 'timestamp': '2025-09-30 22:33:22.536624', 'step': 17207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.569348', 'step': 17207, 'epoch': 3} {'type': 'loss', 'content': 0.10400338470935822, 'timestamp': '2025-09-30 22:33:22.605216', 'step': 17208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:22.643743', 'step': 17208, 'epoch': 3} {'type': 'loss', 'content': 0.10224338620901108, 'timestamp': '2025-09-30 22:33:22.648306', 'step': 17209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:22.681526', 'step': 17209, 'epoch': 3} {'type': 'loss', 'content': 0.07521769404411316, 'timestamp': '2025-09-30 22:33:22.696666', 'step': 17210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.737165', 'step': 17210, 'epoch': 3} {'type': 'loss', 'content': 0.17877738177776337, 'timestamp': '2025-09-30 22:33:22.741347', 'step': 17211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.774130', 'step': 17211, 'epoch': 3} {'type': 'loss', 'content': 0.0904395654797554, 'timestamp': '2025-09-30 22:33:22.800628', 'step': 17212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:22.849006', 'step': 17212, 'epoch': 3} {'type': 'loss', 'content': 0.03734472021460533, 'timestamp': '2025-09-30 22:33:22.855512', 'step': 17213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.893200', 'step': 17213, 'epoch': 3} {'type': 'loss', 'content': 0.08390992879867554, 'timestamp': '2025-09-30 22:33:22.895893', 'step': 17214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:22.928124', 'step': 17214, 'epoch': 3} {'type': 'loss', 'content': 0.10011740028858185, 'timestamp': '2025-09-30 22:33:22.932224', 'step': 17215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:22.966573', 'step': 17215, 'epoch': 3} {'type': 'loss', 'content': 0.009799566119909286, 'timestamp': '2025-09-30 22:33:23.001698', 'step': 17216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.036063', 'step': 17216, 'epoch': 3} {'type': 'loss', 'content': 0.08578548580408096, 'timestamp': '2025-09-30 22:33:23.040125', 'step': 17217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:23.071258', 'step': 17217, 'epoch': 3} {'type': 'loss', 'content': 0.026302162557840347, 'timestamp': '2025-09-30 22:33:23.075756', 'step': 17218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.108775', 'step': 17218, 'epoch': 3} {'type': 'loss', 'content': 0.03251289576292038, 'timestamp': '2025-09-30 22:33:23.117561', 'step': 17219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:23.153276', 'step': 17219, 'epoch': 3} {'type': 'loss', 'content': 0.06899010390043259, 'timestamp': '2025-09-30 22:33:23.178561', 'step': 17220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.228388', 'step': 17220, 'epoch': 3} {'type': 'loss', 'content': 0.07463794946670532, 'timestamp': '2025-09-30 22:33:23.234433', 'step': 17221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.267723', 'step': 17221, 'epoch': 3} {'type': 'loss', 'content': 0.03247228264808655, 'timestamp': '2025-09-30 22:33:23.285237', 'step': 17222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:23.320151', 'step': 17222, 'epoch': 3} {'type': 'loss', 'content': 0.11525466293096542, 'timestamp': '2025-09-30 22:33:23.335269', 'step': 17223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.376670', 'step': 17223, 'epoch': 3} {'type': 'loss', 'content': 0.10116453468799591, 'timestamp': '2025-09-30 22:33:23.403692', 'step': 17224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:23.439631', 'step': 17224, 'epoch': 3} {'type': 'loss', 'content': 0.09634487330913544, 'timestamp': '2025-09-30 22:33:23.445296', 'step': 17225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.480886', 'step': 17225, 'epoch': 3} {'type': 'loss', 'content': 0.06460907310247421, 'timestamp': '2025-09-30 22:33:23.494803', 'step': 17226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.527168', 'step': 17226, 'epoch': 3} {'type': 'loss', 'content': 0.026864588260650635, 'timestamp': '2025-09-30 22:33:23.534986', 'step': 17227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.585911', 'step': 17227, 'epoch': 3} {'type': 'loss', 'content': 0.11066995561122894, 'timestamp': '2025-09-30 22:33:23.611282', 'step': 17228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:23.644516', 'step': 17228, 'epoch': 3} {'type': 'loss', 'content': 0.06487594544887543, 'timestamp': '2025-09-30 22:33:23.649301', 'step': 17229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:23.687592', 'step': 17229, 'epoch': 3} {'type': 'loss', 'content': 0.1282140165567398, 'timestamp': '2025-09-30 22:33:23.704220', 'step': 17230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.745739', 'step': 17230, 'epoch': 3} {'type': 'loss', 'content': 0.02207106724381447, 'timestamp': '2025-09-30 22:33:23.749408', 'step': 17231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.782164', 'step': 17231, 'epoch': 3} {'type': 'loss', 'content': 0.012879028916358948, 'timestamp': '2025-09-30 22:33:23.808568', 'step': 17232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:23.843287', 'step': 17232, 'epoch': 3} {'type': 'loss', 'content': 0.04846011474728584, 'timestamp': '2025-09-30 22:33:23.847506', 'step': 17233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:23.884431', 'step': 17233, 'epoch': 3} {'type': 'loss', 'content': 0.07193496823310852, 'timestamp': '2025-09-30 22:33:23.896908', 'step': 17234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:23.929512', 'step': 17234, 'epoch': 3} {'type': 'loss', 'content': 0.05510259047150612, 'timestamp': '2025-09-30 22:33:23.942730', 'step': 17235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:23.976277', 'step': 17235, 'epoch': 3} {'type': 'loss', 'content': 0.026944784447550774, 'timestamp': '2025-09-30 22:33:24.011160', 'step': 17236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:24.043353', 'step': 17236, 'epoch': 3} {'type': 'loss', 'content': 0.043452758342027664, 'timestamp': '2025-09-30 22:33:24.047432', 'step': 17237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:24.081005', 'step': 17237, 'epoch': 3} {'type': 'loss', 'content': 0.043523158878088, 'timestamp': '2025-09-30 22:33:24.085345', 'step': 17238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:24.117750', 'step': 17238, 'epoch': 3} {'type': 'loss', 'content': 0.05744705721735954, 'timestamp': '2025-09-30 22:33:24.120661', 'step': 17239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:33:24.152645', 'step': 17239, 'epoch': 3} {'type': 'loss', 'content': 0.006636560428887606, 'timestamp': '2025-09-30 22:33:24.178426', 'step': 17240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:24.216575', 'step': 17240, 'epoch': 3} {'type': 'loss', 'content': 0.05863921716809273, 'timestamp': '2025-09-30 22:33:24.223948', 'step': 17241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:24.260332', 'step': 17241, 'epoch': 3} {'type': 'loss', 'content': 0.02992074005305767, 'timestamp': '2025-09-30 22:33:24.264655', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:33:32.300048', 'step': 17242, 'epoch': 3} {'type': 'pplx', 'content': 10566.288979018413, 'timestamp': '2025-09-30 22:33:32.304465', 'step': 17242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:32.335396', 'step': 17242, 'epoch': 3} {'type': 'loss', 'content': 0.0651136115193367, 'timestamp': '2025-09-30 22:33:32.338558', 'step': 17243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:32.377547', 'step': 17243, 'epoch': 3} {'type': 'loss', 'content': 0.10936478525400162, 'timestamp': '2025-09-30 22:33:32.402073', 'step': 17244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:32.435405', 'step': 17244, 'epoch': 3} {'type': 'loss', 'content': 0.12977318465709686, 'timestamp': '2025-09-30 22:33:32.440162', 'step': 17245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:32.473293', 'step': 17245, 'epoch': 3} {'type': 'loss', 'content': 0.028592495247721672, 'timestamp': '2025-09-30 22:33:32.477131', 'step': 17246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:32.509131', 'step': 17246, 'epoch': 3} {'type': 'loss', 'content': 0.07474572211503983, 'timestamp': '2025-09-30 22:33:32.513835', 'step': 17247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:32.549468', 'step': 17247, 'epoch': 3} {'type': 'loss', 'content': 0.02991264872252941, 'timestamp': '2025-09-30 22:33:32.586894', 'step': 17248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:32.622950', 'step': 17248, 'epoch': 3} {'type': 'loss', 'content': 0.10193006694316864, 'timestamp': '2025-09-30 22:33:32.626466', 'step': 17249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:32.664307', 'step': 17249, 'epoch': 3} {'type': 'loss', 'content': 0.0884491428732872, 'timestamp': '2025-09-30 22:33:32.686786', 'step': 17250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:32.731561', 'step': 17250, 'epoch': 3} {'type': 'loss', 'content': 0.056379303336143494, 'timestamp': '2025-09-30 22:33:32.735074', 'step': 17251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:32.766656', 'step': 17251, 'epoch': 3} {'type': 'loss', 'content': 0.18360105156898499, 'timestamp': '2025-09-30 22:33:32.791555', 'step': 17252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:32.835697', 'step': 17252, 'epoch': 3} {'type': 'loss', 'content': 0.06255722790956497, 'timestamp': '2025-09-30 22:33:32.857876', 'step': 17253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:32.891583', 'step': 17253, 'epoch': 3} {'type': 'loss', 'content': 0.053152896463871, 'timestamp': '2025-09-30 22:33:32.926012', 'step': 17254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:32.995993', 'step': 17254, 'epoch': 3} {'type': 'loss', 'content': 0.08343282341957092, 'timestamp': '2025-09-30 22:33:33.001772', 'step': 17255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.049880', 'step': 17255, 'epoch': 3} {'type': 'loss', 'content': 0.05824873968958855, 'timestamp': '2025-09-30 22:33:33.076883', 'step': 17256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:33.127070', 'step': 17256, 'epoch': 3} {'type': 'loss', 'content': 0.04446379095315933, 'timestamp': '2025-09-30 22:33:33.130193', 'step': 17257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:33.168017', 'step': 17257, 'epoch': 3} {'type': 'loss', 'content': 0.018177399411797523, 'timestamp': '2025-09-30 22:33:33.184633', 'step': 17258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:33.217176', 'step': 17258, 'epoch': 3} {'type': 'loss', 'content': 0.0390760712325573, 'timestamp': '2025-09-30 22:33:33.221520', 'step': 17259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.257908', 'step': 17259, 'epoch': 3} {'type': 'loss', 'content': 0.08557911962270737, 'timestamp': '2025-09-30 22:33:33.295377', 'step': 17260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:33.339504', 'step': 17260, 'epoch': 3} {'type': 'loss', 'content': 0.049519021064043045, 'timestamp': '2025-09-30 22:33:33.352046', 'step': 17261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.389759', 'step': 17261, 'epoch': 3} {'type': 'loss', 'content': 0.09113705903291702, 'timestamp': '2025-09-30 22:33:33.404372', 'step': 17262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.441876', 'step': 17262, 'epoch': 3} {'type': 'loss', 'content': 0.08127936720848083, 'timestamp': '2025-09-30 22:33:33.447761', 'step': 17263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.491060', 'step': 17263, 'epoch': 3} {'type': 'loss', 'content': 0.047452449798583984, 'timestamp': '2025-09-30 22:33:33.515306', 'step': 17264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.549313', 'step': 17264, 'epoch': 3} {'type': 'loss', 'content': 0.09115641564130783, 'timestamp': '2025-09-30 22:33:33.554985', 'step': 17265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:33.585818', 'step': 17265, 'epoch': 3} {'type': 'loss', 'content': 0.09615159779787064, 'timestamp': '2025-09-30 22:33:33.589135', 'step': 17266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.622070', 'step': 17266, 'epoch': 3} {'type': 'loss', 'content': 0.01391026470810175, 'timestamp': '2025-09-30 22:33:33.625482', 'step': 17267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:33.662796', 'step': 17267, 'epoch': 3} {'type': 'loss', 'content': 0.058319538831710815, 'timestamp': '2025-09-30 22:33:33.691384', 'step': 17268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.729523', 'step': 17268, 'epoch': 3} {'type': 'loss', 'content': 0.12230958789587021, 'timestamp': '2025-09-30 22:33:33.735952', 'step': 17269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:33.770981', 'step': 17269, 'epoch': 3} {'type': 'loss', 'content': 0.052169594913721085, 'timestamp': '2025-09-30 22:33:33.775653', 'step': 17270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.821647', 'step': 17270, 'epoch': 3} {'type': 'loss', 'content': 0.07708173990249634, 'timestamp': '2025-09-30 22:33:33.833152', 'step': 17271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.866278', 'step': 17271, 'epoch': 3} {'type': 'loss', 'content': 0.04357427358627319, 'timestamp': '2025-09-30 22:33:33.893874', 'step': 17272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:33.939567', 'step': 17272, 'epoch': 3} {'type': 'loss', 'content': 0.03146981820464134, 'timestamp': '2025-09-30 22:33:33.942984', 'step': 17273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:33.988421', 'step': 17273, 'epoch': 3} {'type': 'loss', 'content': 0.01173507422208786, 'timestamp': '2025-09-30 22:33:33.992435', 'step': 17274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.027903', 'step': 17274, 'epoch': 3} {'type': 'loss', 'content': 0.03043413534760475, 'timestamp': '2025-09-30 22:33:34.041404', 'step': 17275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.086726', 'step': 17275, 'epoch': 3} {'type': 'loss', 'content': 0.023866571485996246, 'timestamp': '2025-09-30 22:33:34.111131', 'step': 17276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.145365', 'step': 17276, 'epoch': 3} {'type': 'loss', 'content': 0.0755804032087326, 'timestamp': '2025-09-30 22:33:34.149040', 'step': 17277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:34.181107', 'step': 17277, 'epoch': 3} {'type': 'loss', 'content': 0.06308454275131226, 'timestamp': '2025-09-30 22:33:34.185445', 'step': 17278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:34.217306', 'step': 17278, 'epoch': 3} {'type': 'loss', 'content': 0.07270432263612747, 'timestamp': '2025-09-30 22:33:34.228129', 'step': 17279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.261336', 'step': 17279, 'epoch': 3} {'type': 'loss', 'content': 0.10195600241422653, 'timestamp': '2025-09-30 22:33:34.287895', 'step': 17280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:34.323381', 'step': 17280, 'epoch': 3} {'type': 'loss', 'content': 0.048096731305122375, 'timestamp': '2025-09-30 22:33:34.334963', 'step': 17281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:34.365761', 'step': 17281, 'epoch': 3} {'type': 'loss', 'content': 0.07651454955339432, 'timestamp': '2025-09-30 22:33:34.371018', 'step': 17282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:34.412102', 'step': 17282, 'epoch': 3} {'type': 'loss', 'content': 0.09193704277276993, 'timestamp': '2025-09-30 22:33:34.414541', 'step': 17283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:33:34.446201', 'step': 17283, 'epoch': 3} {'type': 'loss', 'content': 0.19391711056232452, 'timestamp': '2025-09-30 22:33:34.471522', 'step': 17284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.530429', 'step': 17284, 'epoch': 3} {'type': 'loss', 'content': 0.06375867128372192, 'timestamp': '2025-09-30 22:33:34.533121', 'step': 17285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:34.563410', 'step': 17285, 'epoch': 3} {'type': 'loss', 'content': 0.09701810032129288, 'timestamp': '2025-09-30 22:33:34.567339', 'step': 17286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:34.600168', 'step': 17286, 'epoch': 3} {'type': 'loss', 'content': 0.1270599067211151, 'timestamp': '2025-09-30 22:33:34.602597', 'step': 17287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:34.634142', 'step': 17287, 'epoch': 3} {'type': 'loss', 'content': 0.022309839725494385, 'timestamp': '2025-09-30 22:33:34.659419', 'step': 17288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.690774', 'step': 17288, 'epoch': 3} {'type': 'loss', 'content': 0.08046628534793854, 'timestamp': '2025-09-30 22:33:34.697087', 'step': 17289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:34.735540', 'step': 17289, 'epoch': 3} {'type': 'loss', 'content': 0.027031339704990387, 'timestamp': '2025-09-30 22:33:34.741296', 'step': 17290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.776590', 'step': 17290, 'epoch': 3} {'type': 'loss', 'content': 0.02229313738644123, 'timestamp': '2025-09-30 22:33:34.780227', 'step': 17291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.810194', 'step': 17291, 'epoch': 3} {'type': 'loss', 'content': 0.05590919032692909, 'timestamp': '2025-09-30 22:33:34.838476', 'step': 17292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:34.878692', 'step': 17292, 'epoch': 3} {'type': 'loss', 'content': 0.10894893854856491, 'timestamp': '2025-09-30 22:33:34.883524', 'step': 17293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:34.917389', 'step': 17293, 'epoch': 3} {'type': 'loss', 'content': 0.055216021835803986, 'timestamp': '2025-09-30 22:33:34.920672', 'step': 17294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:34.962584', 'step': 17294, 'epoch': 3} {'type': 'loss', 'content': 0.03378982096910477, 'timestamp': '2025-09-30 22:33:34.966854', 'step': 17295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:35.014179', 'step': 17295, 'epoch': 3} {'type': 'loss', 'content': 0.0958152562379837, 'timestamp': '2025-09-30 22:33:35.053401', 'step': 17296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.098803', 'step': 17296, 'epoch': 3} {'type': 'loss', 'content': 0.029716119170188904, 'timestamp': '2025-09-30 22:33:35.115531', 'step': 17297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.147748', 'step': 17297, 'epoch': 3} {'type': 'loss', 'content': 0.08694806694984436, 'timestamp': '2025-09-30 22:33:35.153757', 'step': 17298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.185888', 'step': 17298, 'epoch': 3} {'type': 'loss', 'content': 0.03920729458332062, 'timestamp': '2025-09-30 22:33:35.190760', 'step': 17299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.239336', 'step': 17299, 'epoch': 3} {'type': 'loss', 'content': 0.03886597231030464, 'timestamp': '2025-09-30 22:33:35.265225', 'step': 17300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.310272', 'step': 17300, 'epoch': 3} {'type': 'loss', 'content': 0.07241944223642349, 'timestamp': '2025-09-30 22:33:35.313225', 'step': 17301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:35.344186', 'step': 17301, 'epoch': 3} {'type': 'loss', 'content': 0.07240021228790283, 'timestamp': '2025-09-30 22:33:35.359357', 'step': 17302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.392465', 'step': 17302, 'epoch': 3} {'type': 'loss', 'content': 0.08707371354103088, 'timestamp': '2025-09-30 22:33:35.398425', 'step': 17303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:35.438703', 'step': 17303, 'epoch': 3} {'type': 'loss', 'content': 0.08409087359905243, 'timestamp': '2025-09-30 22:33:35.465078', 'step': 17304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:35.499430', 'step': 17304, 'epoch': 3} {'type': 'loss', 'content': 0.04842749610543251, 'timestamp': '2025-09-30 22:33:35.503354', 'step': 17305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:35.542960', 'step': 17305, 'epoch': 3} {'type': 'loss', 'content': 0.040943119674921036, 'timestamp': '2025-09-30 22:33:35.551370', 'step': 17306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.589021', 'step': 17306, 'epoch': 3} {'type': 'loss', 'content': 0.09776894003152847, 'timestamp': '2025-09-30 22:33:35.592877', 'step': 17307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:35.624913', 'step': 17307, 'epoch': 3} {'type': 'loss', 'content': 0.022711915895342827, 'timestamp': '2025-09-30 22:33:35.659928', 'step': 17308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.691762', 'step': 17308, 'epoch': 3} {'type': 'loss', 'content': 0.09261316806077957, 'timestamp': '2025-09-30 22:33:35.694287', 'step': 17309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.738424', 'step': 17309, 'epoch': 3} {'type': 'loss', 'content': 0.10055451095104218, 'timestamp': '2025-09-30 22:33:35.742612', 'step': 17310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:35.782196', 'step': 17310, 'epoch': 3} {'type': 'loss', 'content': 0.11317506432533264, 'timestamp': '2025-09-30 22:33:35.785843', 'step': 17311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.820067', 'step': 17311, 'epoch': 3} {'type': 'loss', 'content': 0.09216247498989105, 'timestamp': '2025-09-30 22:33:35.844610', 'step': 17312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.876085', 'step': 17312, 'epoch': 3} {'type': 'loss', 'content': 0.0778651088476181, 'timestamp': '2025-09-30 22:33:35.882510', 'step': 17313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:35.917486', 'step': 17313, 'epoch': 3} {'type': 'loss', 'content': 0.03639853373169899, 'timestamp': '2025-09-30 22:33:35.920699', 'step': 17314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.961542', 'step': 17314, 'epoch': 3} {'type': 'loss', 'content': 0.08311372995376587, 'timestamp': '2025-09-30 22:33:35.964229', 'step': 17315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:35.998373', 'step': 17315, 'epoch': 3} {'type': 'loss', 'content': 0.06933176517486572, 'timestamp': '2025-09-30 22:33:36.024885', 'step': 17316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.061948', 'step': 17316, 'epoch': 3} {'type': 'loss', 'content': 0.04367172345519066, 'timestamp': '2025-09-30 22:33:36.067839', 'step': 17317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:36.109078', 'step': 17317, 'epoch': 3} {'type': 'loss', 'content': 0.027456525713205338, 'timestamp': '2025-09-30 22:33:36.115519', 'step': 17318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:36.153743', 'step': 17318, 'epoch': 3} {'type': 'loss', 'content': 0.09986225515604019, 'timestamp': '2025-09-30 22:33:36.159770', 'step': 17319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:36.191455', 'step': 17319, 'epoch': 3} {'type': 'loss', 'content': 0.05923936143517494, 'timestamp': '2025-09-30 22:33:36.217555', 'step': 17320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.251093', 'step': 17320, 'epoch': 3} {'type': 'loss', 'content': 0.08657518774271011, 'timestamp': '2025-09-30 22:33:36.253863', 'step': 17321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:36.285265', 'step': 17321, 'epoch': 3} {'type': 'loss', 'content': 0.08115839958190918, 'timestamp': '2025-09-30 22:33:36.288487', 'step': 17322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.325076', 'step': 17322, 'epoch': 3} {'type': 'loss', 'content': 0.043556708842515945, 'timestamp': '2025-09-30 22:33:36.331465', 'step': 17323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.371356', 'step': 17323, 'epoch': 3} {'type': 'loss', 'content': 0.0652514398097992, 'timestamp': '2025-09-30 22:33:36.395871', 'step': 17324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:36.431171', 'step': 17324, 'epoch': 3} {'type': 'loss', 'content': 0.04189852625131607, 'timestamp': '2025-09-30 22:33:36.435680', 'step': 17325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.469096', 'step': 17325, 'epoch': 3} {'type': 'loss', 'content': 0.028343958780169487, 'timestamp': '2025-09-30 22:33:36.473955', 'step': 17326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.507012', 'step': 17326, 'epoch': 3} {'type': 'loss', 'content': 0.057048238813877106, 'timestamp': '2025-09-30 22:33:36.511408', 'step': 17327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:36.549774', 'step': 17327, 'epoch': 3} {'type': 'loss', 'content': 0.016223477199673653, 'timestamp': '2025-09-30 22:33:36.574630', 'step': 17328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:36.614361', 'step': 17328, 'epoch': 3} {'type': 'loss', 'content': 0.12744766473770142, 'timestamp': '2025-09-30 22:33:36.618492', 'step': 17329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:36.651375', 'step': 17329, 'epoch': 3} {'type': 'loss', 'content': 0.07046637684106827, 'timestamp': '2025-09-30 22:33:36.662243', 'step': 17330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:36.709515', 'step': 17330, 'epoch': 3} {'type': 'loss', 'content': 0.09944786131381989, 'timestamp': '2025-09-30 22:33:36.715332', 'step': 17331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:36.752374', 'step': 17331, 'epoch': 3} {'type': 'loss', 'content': 0.08869779855012894, 'timestamp': '2025-09-30 22:33:36.781613', 'step': 17332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:36.813648', 'step': 17332, 'epoch': 3} {'type': 'loss', 'content': 0.022464966401457787, 'timestamp': '2025-09-30 22:33:36.818706', 'step': 17333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:36.852229', 'step': 17333, 'epoch': 3} {'type': 'loss', 'content': 0.08971557021141052, 'timestamp': '2025-09-30 22:33:36.859451', 'step': 17334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:36.897524', 'step': 17334, 'epoch': 3} {'type': 'loss', 'content': 0.05409396067261696, 'timestamp': '2025-09-30 22:33:36.901695', 'step': 17335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:36.940274', 'step': 17335, 'epoch': 3} {'type': 'loss', 'content': 0.12432833015918732, 'timestamp': '2025-09-30 22:33:36.967197', 'step': 17336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:36.998810', 'step': 17336, 'epoch': 3} {'type': 'loss', 'content': 0.09904219955205917, 'timestamp': '2025-09-30 22:33:37.007505', 'step': 17337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:37.044816', 'step': 17337, 'epoch': 3} {'type': 'loss', 'content': 0.08011405169963837, 'timestamp': '2025-09-30 22:33:37.060017', 'step': 17338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.105644', 'step': 17338, 'epoch': 3} {'type': 'loss', 'content': 0.05903848260641098, 'timestamp': '2025-09-30 22:33:37.108856', 'step': 17339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.141165', 'step': 17339, 'epoch': 3} {'type': 'loss', 'content': 0.07541148364543915, 'timestamp': '2025-09-30 22:33:37.165941', 'step': 17340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:37.199775', 'step': 17340, 'epoch': 3} {'type': 'loss', 'content': 0.06493666768074036, 'timestamp': '2025-09-30 22:33:37.204908', 'step': 17341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:37.235366', 'step': 17341, 'epoch': 3} {'type': 'loss', 'content': 0.08074898272752762, 'timestamp': '2025-09-30 22:33:37.240166', 'step': 17342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:37.284615', 'step': 17342, 'epoch': 3} {'type': 'loss', 'content': 0.10142364352941513, 'timestamp': '2025-09-30 22:33:37.288487', 'step': 17343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:37.321937', 'step': 17343, 'epoch': 3} {'type': 'loss', 'content': 0.0604008249938488, 'timestamp': '2025-09-30 22:33:37.347535', 'step': 17344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.378793', 'step': 17344, 'epoch': 3} {'type': 'loss', 'content': 0.10320647060871124, 'timestamp': '2025-09-30 22:33:37.388843', 'step': 17345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.421105', 'step': 17345, 'epoch': 3} {'type': 'loss', 'content': 0.110809326171875, 'timestamp': '2025-09-30 22:33:37.423582', 'step': 17346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.457532', 'step': 17346, 'epoch': 3} {'type': 'loss', 'content': 0.02555946260690689, 'timestamp': '2025-09-30 22:33:37.463221', 'step': 17347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:37.496965', 'step': 17347, 'epoch': 3} {'type': 'loss', 'content': 0.05884675309062004, 'timestamp': '2025-09-30 22:33:37.522733', 'step': 17348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.560130', 'step': 17348, 'epoch': 3} {'type': 'loss', 'content': 0.04629112407565117, 'timestamp': '2025-09-30 22:33:37.568795', 'step': 17349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:37.599940', 'step': 17349, 'epoch': 3} {'type': 'loss', 'content': 0.07121112197637558, 'timestamp': '2025-09-30 22:33:37.604121', 'step': 17350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.636269', 'step': 17350, 'epoch': 3} {'type': 'loss', 'content': 0.07172150164842606, 'timestamp': '2025-09-30 22:33:37.641751', 'step': 17351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.679756', 'step': 17351, 'epoch': 3} {'type': 'loss', 'content': 0.07349216938018799, 'timestamp': '2025-09-30 22:33:37.707680', 'step': 17352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.743936', 'step': 17352, 'epoch': 3} {'type': 'loss', 'content': 0.048546694219112396, 'timestamp': '2025-09-30 22:33:37.747439', 'step': 17353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:37.781678', 'step': 17353, 'epoch': 3} {'type': 'loss', 'content': 0.0838441327214241, 'timestamp': '2025-09-30 22:33:37.785242', 'step': 17354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.818167', 'step': 17354, 'epoch': 3} {'type': 'loss', 'content': 0.1653493344783783, 'timestamp': '2025-09-30 22:33:37.829263', 'step': 17355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.860794', 'step': 17355, 'epoch': 3} {'type': 'loss', 'content': 0.07380618900060654, 'timestamp': '2025-09-30 22:33:37.895332', 'step': 17356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.937217', 'step': 17356, 'epoch': 3} {'type': 'loss', 'content': 0.03261454403400421, 'timestamp': '2025-09-30 22:33:37.943497', 'step': 17357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:37.977284', 'step': 17357, 'epoch': 3} {'type': 'loss', 'content': 0.09031915664672852, 'timestamp': '2025-09-30 22:33:37.981442', 'step': 17358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.015279', 'step': 17358, 'epoch': 3} {'type': 'loss', 'content': 0.04534982517361641, 'timestamp': '2025-09-30 22:33:38.026814', 'step': 17359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.058195', 'step': 17359, 'epoch': 3} {'type': 'loss', 'content': 0.10430096089839935, 'timestamp': '2025-09-30 22:33:38.083714', 'step': 17360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.116362', 'step': 17360, 'epoch': 3} {'type': 'loss', 'content': 0.07033195346593857, 'timestamp': '2025-09-30 22:33:38.120495', 'step': 17361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.154137', 'step': 17361, 'epoch': 3} {'type': 'loss', 'content': 0.09024862200021744, 'timestamp': '2025-09-30 22:33:38.161685', 'step': 17362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:38.201273', 'step': 17362, 'epoch': 3} {'type': 'loss', 'content': 0.04194869473576546, 'timestamp': '2025-09-30 22:33:38.208854', 'step': 17363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.241610', 'step': 17363, 'epoch': 3} {'type': 'loss', 'content': 0.08595841377973557, 'timestamp': '2025-09-30 22:33:38.268904', 'step': 17364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:38.299366', 'step': 17364, 'epoch': 3} {'type': 'loss', 'content': 0.003481591120362282, 'timestamp': '2025-09-30 22:33:38.304511', 'step': 17365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.338164', 'step': 17365, 'epoch': 3} {'type': 'loss', 'content': 0.0948425754904747, 'timestamp': '2025-09-30 22:33:38.343437', 'step': 17366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:38.378361', 'step': 17366, 'epoch': 3} {'type': 'loss', 'content': 0.03468848019838333, 'timestamp': '2025-09-30 22:33:38.395066', 'step': 17367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.427909', 'step': 17367, 'epoch': 3} {'type': 'loss', 'content': 0.039448317140340805, 'timestamp': '2025-09-30 22:33:38.463727', 'step': 17368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.496367', 'step': 17368, 'epoch': 3} {'type': 'loss', 'content': 0.06043202057480812, 'timestamp': '2025-09-30 22:33:38.500384', 'step': 17369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:38.545713', 'step': 17369, 'epoch': 3} {'type': 'loss', 'content': 0.09018528461456299, 'timestamp': '2025-09-30 22:33:38.547974', 'step': 17370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.579661', 'step': 17370, 'epoch': 3} {'type': 'loss', 'content': 0.03509968891739845, 'timestamp': '2025-09-30 22:33:38.593142', 'step': 17371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.633817', 'step': 17371, 'epoch': 3} {'type': 'loss', 'content': 0.07336747646331787, 'timestamp': '2025-09-30 22:33:38.659212', 'step': 17372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.689621', 'step': 17372, 'epoch': 3} {'type': 'loss', 'content': 0.14849847555160522, 'timestamp': '2025-09-30 22:33:38.692625', 'step': 17373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:38.723329', 'step': 17373, 'epoch': 3} {'type': 'loss', 'content': 0.024234255775809288, 'timestamp': '2025-09-30 22:33:38.731590', 'step': 17374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.762534', 'step': 17374, 'epoch': 3} {'type': 'loss', 'content': 0.07085621356964111, 'timestamp': '2025-09-30 22:33:38.777997', 'step': 17375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:38.811044', 'step': 17375, 'epoch': 3} {'type': 'loss', 'content': 0.057965341955423355, 'timestamp': '2025-09-30 22:33:38.837330', 'step': 17376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.868273', 'step': 17376, 'epoch': 3} {'type': 'loss', 'content': 0.05774342268705368, 'timestamp': '2025-09-30 22:33:38.870994', 'step': 17377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.902636', 'step': 17377, 'epoch': 3} {'type': 'loss', 'content': 0.03523632511496544, 'timestamp': '2025-09-30 22:33:38.905982', 'step': 17378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.937562', 'step': 17378, 'epoch': 3} {'type': 'loss', 'content': 0.043501656502485275, 'timestamp': '2025-09-30 22:33:38.939900', 'step': 17379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:38.971342', 'step': 17379, 'epoch': 3} {'type': 'loss', 'content': 0.07924476265907288, 'timestamp': '2025-09-30 22:33:38.997441', 'step': 17380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.027789', 'step': 17380, 'epoch': 3} {'type': 'loss', 'content': 0.061160650104284286, 'timestamp': '2025-09-30 22:33:39.030816', 'step': 17381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.061242', 'step': 17381, 'epoch': 3} {'type': 'loss', 'content': 0.06412345916032791, 'timestamp': '2025-09-30 22:33:39.064871', 'step': 17382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.101735', 'step': 17382, 'epoch': 3} {'type': 'loss', 'content': 0.06514454632997513, 'timestamp': '2025-09-30 22:33:39.104812', 'step': 17383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.144643', 'step': 17383, 'epoch': 3} {'type': 'loss', 'content': 0.04737911745905876, 'timestamp': '2025-09-30 22:33:39.170879', 'step': 17384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:39.212738', 'step': 17384, 'epoch': 3} {'type': 'loss', 'content': 0.11247818917036057, 'timestamp': '2025-09-30 22:33:39.219942', 'step': 17385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.257341', 'step': 17385, 'epoch': 3} {'type': 'loss', 'content': 0.044816046953201294, 'timestamp': '2025-09-30 22:33:39.261924', 'step': 17386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.292460', 'step': 17386, 'epoch': 3} {'type': 'loss', 'content': 0.03579173609614372, 'timestamp': '2025-09-30 22:33:39.305951', 'step': 17387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:39.336942', 'step': 17387, 'epoch': 3} {'type': 'loss', 'content': 0.14514069259166718, 'timestamp': '2025-09-30 22:33:39.372900', 'step': 17388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:39.404169', 'step': 17388, 'epoch': 3} {'type': 'loss', 'content': 0.03600876405835152, 'timestamp': '2025-09-30 22:33:39.409069', 'step': 17389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.449725', 'step': 17389, 'epoch': 3} {'type': 'loss', 'content': 0.19654783606529236, 'timestamp': '2025-09-30 22:33:39.454140', 'step': 17390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.491591', 'step': 17390, 'epoch': 3} {'type': 'loss', 'content': 0.08012369275093079, 'timestamp': '2025-09-30 22:33:39.495971', 'step': 17391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.527450', 'step': 17391, 'epoch': 3} {'type': 'loss', 'content': 0.09009531140327454, 'timestamp': '2025-09-30 22:33:39.552784', 'step': 17392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.583620', 'step': 17392, 'epoch': 3} {'type': 'loss', 'content': 0.030657578259706497, 'timestamp': '2025-09-30 22:33:39.586225', 'step': 17393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.617767', 'step': 17393, 'epoch': 3} {'type': 'loss', 'content': 0.11529019474983215, 'timestamp': '2025-09-30 22:33:39.624251', 'step': 17394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:39.665060', 'step': 17394, 'epoch': 3} {'type': 'loss', 'content': 0.04417317733168602, 'timestamp': '2025-09-30 22:33:39.669994', 'step': 17395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:39.702366', 'step': 17395, 'epoch': 3} {'type': 'loss', 'content': 0.10514211654663086, 'timestamp': '2025-09-30 22:33:39.740975', 'step': 17396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:39.773262', 'step': 17396, 'epoch': 3} {'type': 'loss', 'content': 0.09494838863611221, 'timestamp': '2025-09-30 22:33:39.777606', 'step': 17397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:39.808913', 'step': 17397, 'epoch': 3} {'type': 'loss', 'content': 0.02426343411207199, 'timestamp': '2025-09-30 22:33:39.821091', 'step': 17398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:39.866457', 'step': 17398, 'epoch': 3} {'type': 'loss', 'content': 0.051559194922447205, 'timestamp': '2025-09-30 22:33:39.871224', 'step': 17399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:39.904445', 'step': 17399, 'epoch': 3} {'type': 'loss', 'content': 0.03590220957994461, 'timestamp': '2025-09-30 22:33:39.930473', 'step': 17400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:39.966535', 'step': 17400, 'epoch': 3} {'type': 'loss', 'content': 0.11284737288951874, 'timestamp': '2025-09-30 22:33:39.970357', 'step': 17401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:40.003682', 'step': 17401, 'epoch': 3} {'type': 'loss', 'content': 0.04534417763352394, 'timestamp': '2025-09-30 22:33:40.019352', 'step': 17402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:40.050854', 'step': 17402, 'epoch': 3} {'type': 'loss', 'content': 0.13820181787014008, 'timestamp': '2025-09-30 22:33:40.054542', 'step': 17403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.089169', 'step': 17403, 'epoch': 3} {'type': 'loss', 'content': 0.06713554263114929, 'timestamp': '2025-09-30 22:33:40.114974', 'step': 17404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.151934', 'step': 17404, 'epoch': 3} {'type': 'loss', 'content': 0.07262948900461197, 'timestamp': '2025-09-30 22:33:40.157639', 'step': 17405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:40.191911', 'step': 17405, 'epoch': 3} {'type': 'loss', 'content': 0.07265859842300415, 'timestamp': '2025-09-30 22:33:40.196734', 'step': 17406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:40.236162', 'step': 17406, 'epoch': 3} {'type': 'loss', 'content': 0.04956994205713272, 'timestamp': '2025-09-30 22:33:40.240294', 'step': 17407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:40.285567', 'step': 17407, 'epoch': 3} {'type': 'loss', 'content': 0.06463602930307388, 'timestamp': '2025-09-30 22:33:40.311443', 'step': 17408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.354321', 'step': 17408, 'epoch': 3} {'type': 'loss', 'content': 0.0606965646147728, 'timestamp': '2025-09-30 22:33:40.371792', 'step': 17409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.404282', 'step': 17409, 'epoch': 3} {'type': 'loss', 'content': 0.0391898937523365, 'timestamp': '2025-09-30 22:33:40.409411', 'step': 17410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.441723', 'step': 17410, 'epoch': 3} {'type': 'loss', 'content': 0.10982409864664078, 'timestamp': '2025-09-30 22:33:40.447910', 'step': 17411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:40.489942', 'step': 17411, 'epoch': 3} {'type': 'loss', 'content': 0.09020011872053146, 'timestamp': '2025-09-30 22:33:40.514418', 'step': 17412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.547586', 'step': 17412, 'epoch': 3} {'type': 'loss', 'content': 0.020441461354494095, 'timestamp': '2025-09-30 22:33:40.551459', 'step': 17413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.592824', 'step': 17413, 'epoch': 3} {'type': 'loss', 'content': 0.14408420026302338, 'timestamp': '2025-09-30 22:33:40.596346', 'step': 17414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.636798', 'step': 17414, 'epoch': 3} {'type': 'loss', 'content': 0.08942748606204987, 'timestamp': '2025-09-30 22:33:40.642299', 'step': 17415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.675864', 'step': 17415, 'epoch': 3} {'type': 'loss', 'content': 0.07577413320541382, 'timestamp': '2025-09-30 22:33:40.703767', 'step': 17416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.744575', 'step': 17416, 'epoch': 3} {'type': 'loss', 'content': 0.03603362664580345, 'timestamp': '2025-09-30 22:33:40.763736', 'step': 17417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:40.802354', 'step': 17417, 'epoch': 3} {'type': 'loss', 'content': 0.09840215742588043, 'timestamp': '2025-09-30 22:33:40.807530', 'step': 17418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.840364', 'step': 17418, 'epoch': 3} {'type': 'loss', 'content': 0.04443791136145592, 'timestamp': '2025-09-30 22:33:40.844535', 'step': 17419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:40.879899', 'step': 17419, 'epoch': 3} {'type': 'loss', 'content': 0.07427123934030533, 'timestamp': '2025-09-30 22:33:40.905715', 'step': 17420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.944473', 'step': 17420, 'epoch': 3} {'type': 'loss', 'content': 0.08380041271448135, 'timestamp': '2025-09-30 22:33:40.948850', 'step': 17421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:40.982282', 'step': 17421, 'epoch': 3} {'type': 'loss', 'content': 0.09007415920495987, 'timestamp': '2025-09-30 22:33:40.985959', 'step': 17422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.027186', 'step': 17422, 'epoch': 3} {'type': 'loss', 'content': 0.04309665411710739, 'timestamp': '2025-09-30 22:33:41.032626', 'step': 17423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:41.079075', 'step': 17423, 'epoch': 3} {'type': 'loss', 'content': 0.035952214151620865, 'timestamp': '2025-09-30 22:33:41.106975', 'step': 17424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:41.150414', 'step': 17424, 'epoch': 3} {'type': 'loss', 'content': 0.06213974207639694, 'timestamp': '2025-09-30 22:33:41.154957', 'step': 17425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.186106', 'step': 17425, 'epoch': 3} {'type': 'loss', 'content': 0.0861251950263977, 'timestamp': '2025-09-30 22:33:41.191911', 'step': 17426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.227393', 'step': 17426, 'epoch': 3} {'type': 'loss', 'content': 0.06933712959289551, 'timestamp': '2025-09-30 22:33:41.239121', 'step': 17427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.278476', 'step': 17427, 'epoch': 3} {'type': 'loss', 'content': 0.1331060528755188, 'timestamp': '2025-09-30 22:33:41.312975', 'step': 17428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:41.344304', 'step': 17428, 'epoch': 3} {'type': 'loss', 'content': 0.02972741611301899, 'timestamp': '2025-09-30 22:33:41.349102', 'step': 17429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.393040', 'step': 17429, 'epoch': 3} {'type': 'loss', 'content': 0.04762019217014313, 'timestamp': '2025-09-30 22:33:41.397529', 'step': 17430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.436529', 'step': 17430, 'epoch': 3} {'type': 'loss', 'content': 0.07988258451223373, 'timestamp': '2025-09-30 22:33:41.455240', 'step': 17431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:41.495505', 'step': 17431, 'epoch': 3} {'type': 'loss', 'content': 0.04643753543496132, 'timestamp': '2025-09-30 22:33:41.523310', 'step': 17432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.556746', 'step': 17432, 'epoch': 3} {'type': 'loss', 'content': 0.11946342140436172, 'timestamp': '2025-09-30 22:33:41.559863', 'step': 17433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.600005', 'step': 17433, 'epoch': 3} {'type': 'loss', 'content': 0.07052793353796005, 'timestamp': '2025-09-30 22:33:41.606193', 'step': 17434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.640460', 'step': 17434, 'epoch': 3} {'type': 'loss', 'content': 0.09703825414180756, 'timestamp': '2025-09-30 22:33:41.657371', 'step': 17435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.688278', 'step': 17435, 'epoch': 3} {'type': 'loss', 'content': 0.057578761130571365, 'timestamp': '2025-09-30 22:33:41.714807', 'step': 17436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.748934', 'step': 17436, 'epoch': 3} {'type': 'loss', 'content': 0.06863486021757126, 'timestamp': '2025-09-30 22:33:41.752335', 'step': 17437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.786113', 'step': 17437, 'epoch': 3} {'type': 'loss', 'content': 0.06289712339639664, 'timestamp': '2025-09-30 22:33:41.789017', 'step': 17438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:41.823138', 'step': 17438, 'epoch': 3} {'type': 'loss', 'content': 0.07663300633430481, 'timestamp': '2025-09-30 22:33:41.834259', 'step': 17439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:41.866624', 'step': 17439, 'epoch': 3} {'type': 'loss', 'content': 0.03357624262571335, 'timestamp': '2025-09-30 22:33:41.891263', 'step': 17440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:41.922641', 'step': 17440, 'epoch': 3} {'type': 'loss', 'content': 0.07883264869451523, 'timestamp': '2025-09-30 22:33:41.926458', 'step': 17441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:41.958665', 'step': 17441, 'epoch': 3} {'type': 'loss', 'content': 0.055438172072172165, 'timestamp': '2025-09-30 22:33:41.972786', 'step': 17442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.011976', 'step': 17442, 'epoch': 3} {'type': 'loss', 'content': 0.02995184250175953, 'timestamp': '2025-09-30 22:33:42.023014', 'step': 17443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.060575', 'step': 17443, 'epoch': 3} {'type': 'loss', 'content': 0.05495074763894081, 'timestamp': '2025-09-30 22:33:42.085733', 'step': 17444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:42.118336', 'step': 17444, 'epoch': 3} {'type': 'loss', 'content': 0.03196290135383606, 'timestamp': '2025-09-30 22:33:42.124471', 'step': 17445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.162021', 'step': 17445, 'epoch': 3} {'type': 'loss', 'content': 0.06066584587097168, 'timestamp': '2025-09-30 22:33:42.177354', 'step': 17446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:42.208103', 'step': 17446, 'epoch': 3} {'type': 'loss', 'content': 0.04252760112285614, 'timestamp': '2025-09-30 22:33:42.213298', 'step': 17447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.248880', 'step': 17447, 'epoch': 3} {'type': 'loss', 'content': 0.0838005542755127, 'timestamp': '2025-09-30 22:33:42.284210', 'step': 17448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:42.316838', 'step': 17448, 'epoch': 3} {'type': 'loss', 'content': 0.0855630412697792, 'timestamp': '2025-09-30 22:33:42.321349', 'step': 17449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.359577', 'step': 17449, 'epoch': 3} {'type': 'loss', 'content': 0.09443269670009613, 'timestamp': '2025-09-30 22:33:42.363666', 'step': 17450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:42.401364', 'step': 17450, 'epoch': 3} {'type': 'loss', 'content': 0.11982917785644531, 'timestamp': '2025-09-30 22:33:42.405974', 'step': 17451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.454577', 'step': 17451, 'epoch': 3} {'type': 'loss', 'content': 0.06146512180566788, 'timestamp': '2025-09-30 22:33:42.488481', 'step': 17452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.530161', 'step': 17452, 'epoch': 3} {'type': 'loss', 'content': 0.04757172241806984, 'timestamp': '2025-09-30 22:33:42.544594', 'step': 17453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.577868', 'step': 17453, 'epoch': 3} {'type': 'loss', 'content': 0.07824014872312546, 'timestamp': '2025-09-30 22:33:42.580684', 'step': 17454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.612771', 'step': 17454, 'epoch': 3} {'type': 'loss', 'content': 0.1456959843635559, 'timestamp': '2025-09-30 22:33:42.626791', 'step': 17455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.658806', 'step': 17455, 'epoch': 3} {'type': 'loss', 'content': 0.08799951523542404, 'timestamp': '2025-09-30 22:33:42.684082', 'step': 17456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.717548', 'step': 17456, 'epoch': 3} {'type': 'loss', 'content': 0.019886475056409836, 'timestamp': '2025-09-30 22:33:42.728055', 'step': 17457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.768005', 'step': 17457, 'epoch': 3} {'type': 'loss', 'content': 0.0817398801445961, 'timestamp': '2025-09-30 22:33:42.771399', 'step': 17458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.802644', 'step': 17458, 'epoch': 3} {'type': 'loss', 'content': 0.05309533327817917, 'timestamp': '2025-09-30 22:33:42.811836', 'step': 17459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.843768', 'step': 17459, 'epoch': 3} {'type': 'loss', 'content': 0.05193489044904709, 'timestamp': '2025-09-30 22:33:42.869838', 'step': 17460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:42.902857', 'step': 17460, 'epoch': 3} {'type': 'loss', 'content': 0.09592887759208679, 'timestamp': '2025-09-30 22:33:42.909772', 'step': 17461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:42.942106', 'step': 17461, 'epoch': 3} {'type': 'loss', 'content': 0.058296434581279755, 'timestamp': '2025-09-30 22:33:42.955800', 'step': 17462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:42.991252', 'step': 17462, 'epoch': 3} {'type': 'loss', 'content': 0.038425419479608536, 'timestamp': '2025-09-30 22:33:42.994582', 'step': 17463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:43.027232', 'step': 17463, 'epoch': 3} {'type': 'loss', 'content': 0.06883444637060165, 'timestamp': '2025-09-30 22:33:43.052564', 'step': 17464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:43.092299', 'step': 17464, 'epoch': 3} {'type': 'loss', 'content': 0.07428308576345444, 'timestamp': '2025-09-30 22:33:43.099503', 'step': 17465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.141625', 'step': 17465, 'epoch': 3} {'type': 'loss', 'content': 0.05475471913814545, 'timestamp': '2025-09-30 22:33:43.145859', 'step': 17466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:43.176373', 'step': 17466, 'epoch': 3} {'type': 'loss', 'content': 0.0852927565574646, 'timestamp': '2025-09-30 22:33:43.183858', 'step': 17467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.214639', 'step': 17467, 'epoch': 3} {'type': 'loss', 'content': 0.125757098197937, 'timestamp': '2025-09-30 22:33:43.238775', 'step': 17468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.277353', 'step': 17468, 'epoch': 3} {'type': 'loss', 'content': 0.07364930957555771, 'timestamp': '2025-09-30 22:33:43.281311', 'step': 17469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:43.324617', 'step': 17469, 'epoch': 3} {'type': 'loss', 'content': 0.05714472383260727, 'timestamp': '2025-09-30 22:33:43.327548', 'step': 17470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:43.371622', 'step': 17470, 'epoch': 3} {'type': 'loss', 'content': 0.10169311612844467, 'timestamp': '2025-09-30 22:33:43.375118', 'step': 17471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:43.406345', 'step': 17471, 'epoch': 3} {'type': 'loss', 'content': 0.03669020161032677, 'timestamp': '2025-09-30 22:33:43.432475', 'step': 17472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:43.473443', 'step': 17472, 'epoch': 3} {'type': 'loss', 'content': 0.09660770744085312, 'timestamp': '2025-09-30 22:33:43.478239', 'step': 17473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:43.517461', 'step': 17473, 'epoch': 3} {'type': 'loss', 'content': 0.08270791172981262, 'timestamp': '2025-09-30 22:33:43.529248', 'step': 17474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.575290', 'step': 17474, 'epoch': 3} {'type': 'loss', 'content': 0.06695356965065002, 'timestamp': '2025-09-30 22:33:43.584912', 'step': 17475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:43.632797', 'step': 17475, 'epoch': 3} {'type': 'loss', 'content': 0.05040637031197548, 'timestamp': '2025-09-30 22:33:43.659803', 'step': 17476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:43.696694', 'step': 17476, 'epoch': 3} {'type': 'loss', 'content': 0.03843875974416733, 'timestamp': '2025-09-30 22:33:43.701962', 'step': 17477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:43.742230', 'step': 17477, 'epoch': 3} {'type': 'loss', 'content': 0.06868112832307816, 'timestamp': '2025-09-30 22:33:43.747693', 'step': 17478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.782561', 'step': 17478, 'epoch': 3} {'type': 'loss', 'content': 0.05212043970823288, 'timestamp': '2025-09-30 22:33:43.787356', 'step': 17479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.820335', 'step': 17479, 'epoch': 3} {'type': 'loss', 'content': 0.06652573496103287, 'timestamp': '2025-09-30 22:33:43.846764', 'step': 17480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:43.885721', 'step': 17480, 'epoch': 3} {'type': 'loss', 'content': 0.026179559528827667, 'timestamp': '2025-09-30 22:33:43.896504', 'step': 17481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:43.935414', 'step': 17481, 'epoch': 3} {'type': 'loss', 'content': 0.023212693631649017, 'timestamp': '2025-09-30 22:33:43.950252', 'step': 17482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:43.983964', 'step': 17482, 'epoch': 3} {'type': 'loss', 'content': 0.0665484219789505, 'timestamp': '2025-09-30 22:33:43.990440', 'step': 17483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:44.023584', 'step': 17483, 'epoch': 3} {'type': 'loss', 'content': 0.0032892695162445307, 'timestamp': '2025-09-30 22:33:44.058871', 'step': 17484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:44.092035', 'step': 17484, 'epoch': 3} {'type': 'loss', 'content': 0.06687553972005844, 'timestamp': '2025-09-30 22:33:44.105052', 'step': 17485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:44.138854', 'step': 17485, 'epoch': 3} {'type': 'loss', 'content': 0.028174402192234993, 'timestamp': '2025-09-30 22:33:44.143453', 'step': 17486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:44.174803', 'step': 17486, 'epoch': 3} {'type': 'loss', 'content': 0.0969206839799881, 'timestamp': '2025-09-30 22:33:44.178801', 'step': 17487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:44.210918', 'step': 17487, 'epoch': 3} {'type': 'loss', 'content': 0.10908393561840057, 'timestamp': '2025-09-30 22:33:44.235744', 'step': 17488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:44.275851', 'step': 17488, 'epoch': 3} {'type': 'loss', 'content': 0.055863428860902786, 'timestamp': '2025-09-30 22:33:44.279379', 'step': 17489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:44.312529', 'step': 17489, 'epoch': 3} {'type': 'loss', 'content': 0.0581946074962616, 'timestamp': '2025-09-30 22:33:44.315502', 'step': 17490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:44.355327', 'step': 17490, 'epoch': 3} {'type': 'loss', 'content': 0.08575975894927979, 'timestamp': '2025-09-30 22:33:44.360668', 'step': 17491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:44.393862', 'step': 17491, 'epoch': 3} {'type': 'loss', 'content': 0.04348110407590866, 'timestamp': '2025-09-30 22:33:44.418589', 'step': 17492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:44.454537', 'step': 17492, 'epoch': 3} {'type': 'loss', 'content': 0.047741785645484924, 'timestamp': '2025-09-30 22:33:44.464440', 'step': 17493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:44.495719', 'step': 17493, 'epoch': 3} {'type': 'loss', 'content': 0.0434822216629982, 'timestamp': '2025-09-30 22:33:44.503690', 'step': 17494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:33:44.541744', 'step': 17494, 'epoch': 3} {'type': 'loss', 'content': 0.04755357652902603, 'timestamp': '2025-09-30 22:33:44.546095', 'step': 17495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:44.577057', 'step': 17495, 'epoch': 3} {'type': 'loss', 'content': 0.04280833154916763, 'timestamp': '2025-09-30 22:33:44.604209', 'step': 17496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:44.642022', 'step': 17496, 'epoch': 3} {'type': 'loss', 'content': 0.03347361460328102, 'timestamp': '2025-09-30 22:33:44.651215', 'step': 17497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:44.684436', 'step': 17497, 'epoch': 3} {'type': 'loss', 'content': 0.11130709946155548, 'timestamp': '2025-09-30 22:33:44.688473', 'step': 17498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:44.728600', 'step': 17498, 'epoch': 3} {'type': 'loss', 'content': 0.06341534107923508, 'timestamp': '2025-09-30 22:33:44.735084', 'step': 17499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:44.766759', 'step': 17499, 'epoch': 3} {'type': 'loss', 'content': 0.02282540313899517, 'timestamp': '2025-09-30 22:33:44.790539', 'step': 17500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 17500', 'timestamp': '2025-09-30 22:33:50.206722', 'step': 17500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:50.257678', 'step': 17500, 'epoch': 3} {'type': 'loss', 'content': 0.07122663408517838, 'timestamp': '2025-09-30 22:33:50.276874', 'step': 17501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:50.310456', 'step': 17501, 'epoch': 3} {'type': 'loss', 'content': 0.06784150004386902, 'timestamp': '2025-09-30 22:33:50.328853', 'step': 17502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:50.369639', 'step': 17502, 'epoch': 3} {'type': 'loss', 'content': 0.11159608513116837, 'timestamp': '2025-09-30 22:33:50.379899', 'step': 17503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.425299', 'step': 17503, 'epoch': 3} {'type': 'loss', 'content': 0.0862668976187706, 'timestamp': '2025-09-30 22:33:50.449868', 'step': 17504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:50.481785', 'step': 17504, 'epoch': 3} {'type': 'loss', 'content': 0.06439922749996185, 'timestamp': '2025-09-30 22:33:50.487521', 'step': 17505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:50.532451', 'step': 17505, 'epoch': 3} {'type': 'loss', 'content': 0.044883135706186295, 'timestamp': '2025-09-30 22:33:50.538753', 'step': 17506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.573477', 'step': 17506, 'epoch': 3} {'type': 'loss', 'content': 0.10364705324172974, 'timestamp': '2025-09-30 22:33:50.588274', 'step': 17507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.632006', 'step': 17507, 'epoch': 3} {'type': 'loss', 'content': 0.08430106937885284, 'timestamp': '2025-09-30 22:33:50.656990', 'step': 17508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.688833', 'step': 17508, 'epoch': 3} {'type': 'loss', 'content': 0.05341511592268944, 'timestamp': '2025-09-30 22:33:50.692182', 'step': 17509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:50.725817', 'step': 17509, 'epoch': 3} {'type': 'loss', 'content': 0.024397848173975945, 'timestamp': '2025-09-30 22:33:50.729237', 'step': 17510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:50.763323', 'step': 17510, 'epoch': 3} {'type': 'loss', 'content': 0.05960797145962715, 'timestamp': '2025-09-30 22:33:50.767875', 'step': 17511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.803883', 'step': 17511, 'epoch': 3} {'type': 'loss', 'content': 0.011718781664967537, 'timestamp': '2025-09-30 22:33:50.829247', 'step': 17512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:50.863963', 'step': 17512, 'epoch': 3} {'type': 'loss', 'content': 0.06968015432357788, 'timestamp': '2025-09-30 22:33:50.869014', 'step': 17513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.905983', 'step': 17513, 'epoch': 3} {'type': 'loss', 'content': 0.05115709826350212, 'timestamp': '2025-09-30 22:33:50.909054', 'step': 17514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.943778', 'step': 17514, 'epoch': 3} {'type': 'loss', 'content': 0.026621634140610695, 'timestamp': '2025-09-30 22:33:50.947473', 'step': 17515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:50.978160', 'step': 17515, 'epoch': 3} {'type': 'loss', 'content': 0.025008071213960648, 'timestamp': '2025-09-30 22:33:51.003524', 'step': 17516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:51.035725', 'step': 17516, 'epoch': 3} {'type': 'loss', 'content': 0.02150295488536358, 'timestamp': '2025-09-30 22:33:51.039520', 'step': 17517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:51.074513', 'step': 17517, 'epoch': 3} {'type': 'loss', 'content': 0.053260624408721924, 'timestamp': '2025-09-30 22:33:51.077293', 'step': 17518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:51.108555', 'step': 17518, 'epoch': 3} {'type': 'loss', 'content': 0.06066908687353134, 'timestamp': '2025-09-30 22:33:51.113807', 'step': 17519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.144588', 'step': 17519, 'epoch': 3} {'type': 'loss', 'content': 0.042775824666023254, 'timestamp': '2025-09-30 22:33:51.179950', 'step': 17520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:51.212328', 'step': 17520, 'epoch': 3} {'type': 'loss', 'content': 0.06935910135507584, 'timestamp': '2025-09-30 22:33:51.215670', 'step': 17521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.251341', 'step': 17521, 'epoch': 3} {'type': 'loss', 'content': 0.03066444955766201, 'timestamp': '2025-09-30 22:33:51.254039', 'step': 17522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:51.284586', 'step': 17522, 'epoch': 3} {'type': 'loss', 'content': 0.035575300455093384, 'timestamp': '2025-09-30 22:33:51.289747', 'step': 17523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.332062', 'step': 17523, 'epoch': 3} {'type': 'loss', 'content': 0.049992676824331284, 'timestamp': '2025-09-30 22:33:51.357975', 'step': 17524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:51.393615', 'step': 17524, 'epoch': 3} {'type': 'loss', 'content': 0.04469643905758858, 'timestamp': '2025-09-30 22:33:51.398119', 'step': 17525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:51.430147', 'step': 17525, 'epoch': 3} {'type': 'loss', 'content': 0.09073731303215027, 'timestamp': '2025-09-30 22:33:51.433946', 'step': 17526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:51.467365', 'step': 17526, 'epoch': 3} {'type': 'loss', 'content': 0.022562826052308083, 'timestamp': '2025-09-30 22:33:51.471224', 'step': 17527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:51.514600', 'step': 17527, 'epoch': 3} {'type': 'loss', 'content': 0.04502694681286812, 'timestamp': '2025-09-30 22:33:51.551956', 'step': 17528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.586753', 'step': 17528, 'epoch': 3} {'type': 'loss', 'content': 0.15879656374454498, 'timestamp': '2025-09-30 22:33:51.589055', 'step': 17529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:51.626318', 'step': 17529, 'epoch': 3} {'type': 'loss', 'content': 0.10418565571308136, 'timestamp': '2025-09-30 22:33:51.629443', 'step': 17530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.663031', 'step': 17530, 'epoch': 3} {'type': 'loss', 'content': 0.029480477795004845, 'timestamp': '2025-09-30 22:33:51.670502', 'step': 17531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.703964', 'step': 17531, 'epoch': 3} {'type': 'loss', 'content': 0.03450588881969452, 'timestamp': '2025-09-30 22:33:51.732032', 'step': 17532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:51.762044', 'step': 17532, 'epoch': 3} {'type': 'loss', 'content': 0.022461334243416786, 'timestamp': '2025-09-30 22:33:51.766675', 'step': 17533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:51.800334', 'step': 17533, 'epoch': 3} {'type': 'loss', 'content': 0.10230469703674316, 'timestamp': '2025-09-30 22:33:51.803853', 'step': 17534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:51.842985', 'step': 17534, 'epoch': 3} {'type': 'loss', 'content': 0.028316689655184746, 'timestamp': '2025-09-30 22:33:51.861899', 'step': 17535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:51.894068', 'step': 17535, 'epoch': 3} {'type': 'loss', 'content': 0.06175092235207558, 'timestamp': '2025-09-30 22:33:51.921122', 'step': 17536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:51.951955', 'step': 17536, 'epoch': 3} {'type': 'loss', 'content': 0.14989875257015228, 'timestamp': '2025-09-30 22:33:51.956817', 'step': 17537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.002680', 'step': 17537, 'epoch': 3} {'type': 'loss', 'content': 0.01759542152285576, 'timestamp': '2025-09-30 22:33:52.006188', 'step': 17538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.043265', 'step': 17538, 'epoch': 3} {'type': 'loss', 'content': 0.07758533954620361, 'timestamp': '2025-09-30 22:33:52.047801', 'step': 17539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:52.080660', 'step': 17539, 'epoch': 3} {'type': 'loss', 'content': 0.044087640941143036, 'timestamp': '2025-09-30 22:33:52.105030', 'step': 17540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.141254', 'step': 17540, 'epoch': 3} {'type': 'loss', 'content': 0.017367977648973465, 'timestamp': '2025-09-30 22:33:52.144457', 'step': 17541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.187732', 'step': 17541, 'epoch': 3} {'type': 'loss', 'content': 0.040422286838293076, 'timestamp': '2025-09-30 22:33:52.190989', 'step': 17542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.229010', 'step': 17542, 'epoch': 3} {'type': 'loss', 'content': 0.07080347836017609, 'timestamp': '2025-09-30 22:33:52.244692', 'step': 17543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.276116', 'step': 17543, 'epoch': 3} {'type': 'loss', 'content': 0.09508313983678818, 'timestamp': '2025-09-30 22:33:52.303496', 'step': 17544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.333719', 'step': 17544, 'epoch': 3} {'type': 'loss', 'content': 0.07073528319597244, 'timestamp': '2025-09-30 22:33:52.337158', 'step': 17545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:52.368483', 'step': 17545, 'epoch': 3} {'type': 'loss', 'content': 0.08543422818183899, 'timestamp': '2025-09-30 22:33:52.377390', 'step': 17546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.415274', 'step': 17546, 'epoch': 3} {'type': 'loss', 'content': 0.05720839649438858, 'timestamp': '2025-09-30 22:33:52.418907', 'step': 17547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:52.449538', 'step': 17547, 'epoch': 3} {'type': 'loss', 'content': 0.02490420453250408, 'timestamp': '2025-09-30 22:33:52.474864', 'step': 17548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:52.506206', 'step': 17548, 'epoch': 3} {'type': 'loss', 'content': 0.04776673763990402, 'timestamp': '2025-09-30 22:33:52.514494', 'step': 17549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.546317', 'step': 17549, 'epoch': 3} {'type': 'loss', 'content': 0.043940190225839615, 'timestamp': '2025-09-30 22:33:52.553319', 'step': 17550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.587662', 'step': 17550, 'epoch': 3} {'type': 'loss', 'content': 0.08156190067529678, 'timestamp': '2025-09-30 22:33:52.589932', 'step': 17551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.621347', 'step': 17551, 'epoch': 3} {'type': 'loss', 'content': 0.05714796483516693, 'timestamp': '2025-09-30 22:33:52.647620', 'step': 17552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:52.677591', 'step': 17552, 'epoch': 3} {'type': 'loss', 'content': 0.0440414622426033, 'timestamp': '2025-09-30 22:33:52.679651', 'step': 17553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.711081', 'step': 17553, 'epoch': 3} {'type': 'loss', 'content': 0.0921497792005539, 'timestamp': '2025-09-30 22:33:52.714239', 'step': 17554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:52.744698', 'step': 17554, 'epoch': 3} {'type': 'loss', 'content': 0.07075917720794678, 'timestamp': '2025-09-30 22:33:52.748701', 'step': 17555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:52.782000', 'step': 17555, 'epoch': 3} {'type': 'loss', 'content': 0.0615658238530159, 'timestamp': '2025-09-30 22:33:52.809207', 'step': 17556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:52.840556', 'step': 17556, 'epoch': 3} {'type': 'loss', 'content': 0.09600851684808731, 'timestamp': '2025-09-30 22:33:52.845721', 'step': 17557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.877165', 'step': 17557, 'epoch': 3} {'type': 'loss', 'content': 0.03568686544895172, 'timestamp': '2025-09-30 22:33:52.888118', 'step': 17558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:52.930538', 'step': 17558, 'epoch': 3} {'type': 'loss', 'content': 0.06745410710573196, 'timestamp': '2025-09-30 22:33:52.933797', 'step': 17559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:52.969183', 'step': 17559, 'epoch': 3} {'type': 'loss', 'content': 0.06837750971317291, 'timestamp': '2025-09-30 22:33:53.000915', 'step': 17560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.031938', 'step': 17560, 'epoch': 3} {'type': 'loss', 'content': 0.10160134732723236, 'timestamp': '2025-09-30 22:33:53.034456', 'step': 17561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.064837', 'step': 17561, 'epoch': 3} {'type': 'loss', 'content': 0.07403579354286194, 'timestamp': '2025-09-30 22:33:53.068870', 'step': 17562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:53.118673', 'step': 17562, 'epoch': 3} {'type': 'loss', 'content': 0.034132085740566254, 'timestamp': '2025-09-30 22:33:53.122900', 'step': 17563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:53.157337', 'step': 17563, 'epoch': 3} {'type': 'loss', 'content': 0.02550310641527176, 'timestamp': '2025-09-30 22:33:53.182789', 'step': 17564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:53.214521', 'step': 17564, 'epoch': 3} {'type': 'loss', 'content': 0.053696904331445694, 'timestamp': '2025-09-30 22:33:53.218535', 'step': 17565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:53.257691', 'step': 17565, 'epoch': 3} {'type': 'loss', 'content': 0.07779981940984726, 'timestamp': '2025-09-30 22:33:53.261506', 'step': 17566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:53.296723', 'step': 17566, 'epoch': 3} {'type': 'loss', 'content': 0.07085632532835007, 'timestamp': '2025-09-30 22:33:53.311160', 'step': 17567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:53.343332', 'step': 17567, 'epoch': 3} {'type': 'loss', 'content': 0.00724624190479517, 'timestamp': '2025-09-30 22:33:53.370282', 'step': 17568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.404357', 'step': 17568, 'epoch': 3} {'type': 'loss', 'content': 0.031205231323838234, 'timestamp': '2025-09-30 22:33:53.409251', 'step': 17569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.445085', 'step': 17569, 'epoch': 3} {'type': 'loss', 'content': 0.06416895985603333, 'timestamp': '2025-09-30 22:33:53.458582', 'step': 17570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.491298', 'step': 17570, 'epoch': 3} {'type': 'loss', 'content': 0.07164518535137177, 'timestamp': '2025-09-30 22:33:53.496243', 'step': 17571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:53.535860', 'step': 17571, 'epoch': 3} {'type': 'loss', 'content': 0.014243117533624172, 'timestamp': '2025-09-30 22:33:53.565806', 'step': 17572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:53.614332', 'step': 17572, 'epoch': 3} {'type': 'loss', 'content': 0.09719596803188324, 'timestamp': '2025-09-30 22:33:53.619387', 'step': 17573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.653002', 'step': 17573, 'epoch': 3} {'type': 'loss', 'content': 0.006394548341631889, 'timestamp': '2025-09-30 22:33:53.670495', 'step': 17574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:53.714663', 'step': 17574, 'epoch': 3} {'type': 'loss', 'content': 0.058465391397476196, 'timestamp': '2025-09-30 22:33:53.717961', 'step': 17575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.752711', 'step': 17575, 'epoch': 3} {'type': 'loss', 'content': 0.0486452691257, 'timestamp': '2025-09-30 22:33:53.777497', 'step': 17576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:33:53.810046', 'step': 17576, 'epoch': 3} {'type': 'loss', 'content': 0.03746068477630615, 'timestamp': '2025-09-30 22:33:53.815813', 'step': 17577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:53.865363', 'step': 17577, 'epoch': 3} {'type': 'loss', 'content': 0.03424718230962753, 'timestamp': '2025-09-30 22:33:53.870013', 'step': 17578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:53.914698', 'step': 17578, 'epoch': 3} {'type': 'loss', 'content': 0.010825921781361103, 'timestamp': '2025-09-30 22:33:53.928765', 'step': 17579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:53.969652', 'step': 17579, 'epoch': 3} {'type': 'loss', 'content': 0.11067056655883789, 'timestamp': '2025-09-30 22:33:54.010962', 'step': 17580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.044272', 'step': 17580, 'epoch': 3} {'type': 'loss', 'content': 0.10850820690393448, 'timestamp': '2025-09-30 22:33:54.049422', 'step': 17581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.095370', 'step': 17581, 'epoch': 3} {'type': 'loss', 'content': 0.02331368997693062, 'timestamp': '2025-09-30 22:33:54.108004', 'step': 17582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:54.140999', 'step': 17582, 'epoch': 3} {'type': 'loss', 'content': 0.06889605522155762, 'timestamp': '2025-09-30 22:33:54.152759', 'step': 17583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.192901', 'step': 17583, 'epoch': 3} {'type': 'loss', 'content': 0.08101262897253036, 'timestamp': '2025-09-30 22:33:54.220342', 'step': 17584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.263268', 'step': 17584, 'epoch': 3} {'type': 'loss', 'content': 0.0668705552816391, 'timestamp': '2025-09-30 22:33:54.266786', 'step': 17585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.297776', 'step': 17585, 'epoch': 3} {'type': 'loss', 'content': 0.04406386986374855, 'timestamp': '2025-09-30 22:33:54.301593', 'step': 17586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:54.333007', 'step': 17586, 'epoch': 3} {'type': 'loss', 'content': 0.0957103744149208, 'timestamp': '2025-09-30 22:33:54.335822', 'step': 17587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:54.371445', 'step': 17587, 'epoch': 3} {'type': 'loss', 'content': 0.04344535991549492, 'timestamp': '2025-09-30 22:33:54.395510', 'step': 17588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:54.439758', 'step': 17588, 'epoch': 3} {'type': 'loss', 'content': 0.06274617463350296, 'timestamp': '2025-09-30 22:33:54.449476', 'step': 17589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:54.487994', 'step': 17589, 'epoch': 3} {'type': 'loss', 'content': 0.04789087921380997, 'timestamp': '2025-09-30 22:33:54.506368', 'step': 17590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.541920', 'step': 17590, 'epoch': 3} {'type': 'loss', 'content': 0.05742676183581352, 'timestamp': '2025-09-30 22:33:54.545236', 'step': 17591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.580619', 'step': 17591, 'epoch': 3} {'type': 'loss', 'content': 0.05257010459899902, 'timestamp': '2025-09-30 22:33:54.605347', 'step': 17592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.643222', 'step': 17592, 'epoch': 3} {'type': 'loss', 'content': 0.06329512596130371, 'timestamp': '2025-09-30 22:33:54.656594', 'step': 17593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.687995', 'step': 17593, 'epoch': 3} {'type': 'loss', 'content': 0.12357644736766815, 'timestamp': '2025-09-30 22:33:54.691086', 'step': 17594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.723444', 'step': 17594, 'epoch': 3} {'type': 'loss', 'content': 0.11779407411813736, 'timestamp': '2025-09-30 22:33:54.728318', 'step': 17595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:33:54.771053', 'step': 17595, 'epoch': 3} {'type': 'loss', 'content': 0.01946246437728405, 'timestamp': '2025-09-30 22:33:54.796662', 'step': 17596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:54.831466', 'step': 17596, 'epoch': 3} {'type': 'loss', 'content': 0.05528274551033974, 'timestamp': '2025-09-30 22:33:54.835727', 'step': 17597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:54.868051', 'step': 17597, 'epoch': 3} {'type': 'loss', 'content': 0.046467892825603485, 'timestamp': '2025-09-30 22:33:54.872240', 'step': 17598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:54.904207', 'step': 17598, 'epoch': 3} {'type': 'loss', 'content': 0.045477546751499176, 'timestamp': '2025-09-30 22:33:54.907883', 'step': 17599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.939267', 'step': 17599, 'epoch': 3} {'type': 'loss', 'content': 0.1030397042632103, 'timestamp': '2025-09-30 22:33:54.964206', 'step': 17600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:54.995357', 'step': 17600, 'epoch': 3} {'type': 'loss', 'content': 0.012753837741911411, 'timestamp': '2025-09-30 22:33:55.000342', 'step': 17601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:55.033568', 'step': 17601, 'epoch': 3} {'type': 'loss', 'content': 0.05640406161546707, 'timestamp': '2025-09-30 22:33:55.041644', 'step': 17602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.074269', 'step': 17602, 'epoch': 3} {'type': 'loss', 'content': 0.07583887130022049, 'timestamp': '2025-09-30 22:33:55.077369', 'step': 17603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:55.108295', 'step': 17603, 'epoch': 3} {'type': 'loss', 'content': 0.045545585453510284, 'timestamp': '2025-09-30 22:33:55.132821', 'step': 17604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.168068', 'step': 17604, 'epoch': 3} {'type': 'loss', 'content': 0.039232369512319565, 'timestamp': '2025-09-30 22:33:55.181831', 'step': 17605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.219224', 'step': 17605, 'epoch': 3} {'type': 'loss', 'content': 0.04347171634435654, 'timestamp': '2025-09-30 22:33:55.232450', 'step': 17606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.276375', 'step': 17606, 'epoch': 3} {'type': 'loss', 'content': 0.0466575101017952, 'timestamp': '2025-09-30 22:33:55.280605', 'step': 17607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.312088', 'step': 17607, 'epoch': 3} {'type': 'loss', 'content': 0.051047541201114655, 'timestamp': '2025-09-30 22:33:55.338907', 'step': 17608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.380184', 'step': 17608, 'epoch': 3} {'type': 'loss', 'content': 0.04915846511721611, 'timestamp': '2025-09-30 22:33:55.384584', 'step': 17609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.421705', 'step': 17609, 'epoch': 3} {'type': 'loss', 'content': 0.09627825021743774, 'timestamp': '2025-09-30 22:33:55.431872', 'step': 17610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.467577', 'step': 17610, 'epoch': 3} {'type': 'loss', 'content': 0.06815485656261444, 'timestamp': '2025-09-30 22:33:55.471018', 'step': 17611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.508419', 'step': 17611, 'epoch': 3} {'type': 'loss', 'content': 0.07128144800662994, 'timestamp': '2025-09-30 22:33:55.545642', 'step': 17612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.577135', 'step': 17612, 'epoch': 3} {'type': 'loss', 'content': 0.018466681241989136, 'timestamp': '2025-09-30 22:33:55.581121', 'step': 17613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:55.612514', 'step': 17613, 'epoch': 3} {'type': 'loss', 'content': 0.0602385476231575, 'timestamp': '2025-09-30 22:33:55.617167', 'step': 17614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.650580', 'step': 17614, 'epoch': 3} {'type': 'loss', 'content': 0.05735626444220543, 'timestamp': '2025-09-30 22:33:55.656290', 'step': 17615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.691156', 'step': 17615, 'epoch': 3} {'type': 'loss', 'content': 0.04652910679578781, 'timestamp': '2025-09-30 22:33:55.716780', 'step': 17616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.747592', 'step': 17616, 'epoch': 3} {'type': 'loss', 'content': 0.04824534058570862, 'timestamp': '2025-09-30 22:33:55.751031', 'step': 17617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:55.782582', 'step': 17617, 'epoch': 3} {'type': 'loss', 'content': 0.06167776510119438, 'timestamp': '2025-09-30 22:33:55.796105', 'step': 17618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:55.838019', 'step': 17618, 'epoch': 3} {'type': 'loss', 'content': 0.09618299454450607, 'timestamp': '2025-09-30 22:33:55.845520', 'step': 17619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:55.891681', 'step': 17619, 'epoch': 3} {'type': 'loss', 'content': 0.054816775023937225, 'timestamp': '2025-09-30 22:33:55.920332', 'step': 17620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:55.950835', 'step': 17620, 'epoch': 3} {'type': 'loss', 'content': 0.015593372285366058, 'timestamp': '2025-09-30 22:33:55.953978', 'step': 17621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:55.984954', 'step': 17621, 'epoch': 3} {'type': 'loss', 'content': 0.03838716819882393, 'timestamp': '2025-09-30 22:33:55.989574', 'step': 17622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.021977', 'step': 17622, 'epoch': 3} {'type': 'loss', 'content': 0.09071678668260574, 'timestamp': '2025-09-30 22:33:56.025190', 'step': 17623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.062461', 'step': 17623, 'epoch': 3} {'type': 'loss', 'content': 0.08176452666521072, 'timestamp': '2025-09-30 22:33:56.087072', 'step': 17624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.119560', 'step': 17624, 'epoch': 3} {'type': 'loss', 'content': 0.13003620505332947, 'timestamp': '2025-09-30 22:33:56.122227', 'step': 17625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.153930', 'step': 17625, 'epoch': 3} {'type': 'loss', 'content': 0.05199446156620979, 'timestamp': '2025-09-30 22:33:56.157951', 'step': 17626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.194464', 'step': 17626, 'epoch': 3} {'type': 'loss', 'content': 0.07462988048791885, 'timestamp': '2025-09-30 22:33:56.198393', 'step': 17627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:56.234422', 'step': 17627, 'epoch': 3} {'type': 'loss', 'content': 0.04063897207379341, 'timestamp': '2025-09-30 22:33:56.259184', 'step': 17628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:56.305052', 'step': 17628, 'epoch': 3} {'type': 'loss', 'content': 0.06400422751903534, 'timestamp': '2025-09-30 22:33:56.310288', 'step': 17629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:56.358281', 'step': 17629, 'epoch': 3} {'type': 'loss', 'content': 0.15240776538848877, 'timestamp': '2025-09-30 22:33:56.365053', 'step': 17630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.405350', 'step': 17630, 'epoch': 3} {'type': 'loss', 'content': 0.07250375300645828, 'timestamp': '2025-09-30 22:33:56.408382', 'step': 17631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:56.452797', 'step': 17631, 'epoch': 3} {'type': 'loss', 'content': 0.13461396098136902, 'timestamp': '2025-09-30 22:33:56.479467', 'step': 17632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:56.515807', 'step': 17632, 'epoch': 3} {'type': 'loss', 'content': 0.13874436914920807, 'timestamp': '2025-09-30 22:33:56.525754', 'step': 17633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.561802', 'step': 17633, 'epoch': 3} {'type': 'loss', 'content': 0.027840353548526764, 'timestamp': '2025-09-30 22:33:56.577632', 'step': 17634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.620861', 'step': 17634, 'epoch': 3} {'type': 'loss', 'content': 0.08237360417842865, 'timestamp': '2025-09-30 22:33:56.625562', 'step': 17635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:56.657660', 'step': 17635, 'epoch': 3} {'type': 'loss', 'content': 0.07068300247192383, 'timestamp': '2025-09-30 22:33:56.683455', 'step': 17636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.718266', 'step': 17636, 'epoch': 3} {'type': 'loss', 'content': 0.10054390132427216, 'timestamp': '2025-09-30 22:33:56.723349', 'step': 17637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:56.755842', 'step': 17637, 'epoch': 3} {'type': 'loss', 'content': 0.0898689404129982, 'timestamp': '2025-09-30 22:33:56.770072', 'step': 17638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.806783', 'step': 17638, 'epoch': 3} {'type': 'loss', 'content': 0.039049431681632996, 'timestamp': '2025-09-30 22:33:56.814283', 'step': 17639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.848987', 'step': 17639, 'epoch': 3} {'type': 'loss', 'content': 0.20031294226646423, 'timestamp': '2025-09-30 22:33:56.873435', 'step': 17640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:56.906729', 'step': 17640, 'epoch': 3} {'type': 'loss', 'content': 0.0902235135436058, 'timestamp': '2025-09-30 22:33:56.910235', 'step': 17641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:56.941067', 'step': 17641, 'epoch': 3} {'type': 'loss', 'content': 0.06766042858362198, 'timestamp': '2025-09-30 22:33:56.951540', 'step': 17642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:56.987268', 'step': 17642, 'epoch': 3} {'type': 'loss', 'content': 0.06621047109365463, 'timestamp': '2025-09-30 22:33:56.991597', 'step': 17643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:57.028399', 'step': 17643, 'epoch': 3} {'type': 'loss', 'content': 0.056370269507169724, 'timestamp': '2025-09-30 22:33:57.054297', 'step': 17644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:57.095895', 'step': 17644, 'epoch': 3} {'type': 'loss', 'content': 0.06631869077682495, 'timestamp': '2025-09-30 22:33:57.100182', 'step': 17645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.132521', 'step': 17645, 'epoch': 3} {'type': 'loss', 'content': 0.09102345257997513, 'timestamp': '2025-09-30 22:33:57.149005', 'step': 17646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.184465', 'step': 17646, 'epoch': 3} {'type': 'loss', 'content': 0.08098199963569641, 'timestamp': '2025-09-30 22:33:57.187335', 'step': 17647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.222865', 'step': 17647, 'epoch': 3} {'type': 'loss', 'content': 0.05410943925380707, 'timestamp': '2025-09-30 22:33:57.248253', 'step': 17648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:57.280793', 'step': 17648, 'epoch': 3} {'type': 'loss', 'content': 0.053371455520391464, 'timestamp': '2025-09-30 22:33:57.284589', 'step': 17649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.319315', 'step': 17649, 'epoch': 3} {'type': 'loss', 'content': 0.05785447359085083, 'timestamp': '2025-09-30 22:33:57.323432', 'step': 17650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.357722', 'step': 17650, 'epoch': 3} {'type': 'loss', 'content': 0.18814614415168762, 'timestamp': '2025-09-30 22:33:57.363040', 'step': 17651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.403389', 'step': 17651, 'epoch': 3} {'type': 'loss', 'content': 0.10656473785638809, 'timestamp': '2025-09-30 22:33:57.428791', 'step': 17652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:57.466800', 'step': 17652, 'epoch': 3} {'type': 'loss', 'content': 0.09933988004922867, 'timestamp': '2025-09-30 22:33:57.478931', 'step': 17653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.518102', 'step': 17653, 'epoch': 3} {'type': 'loss', 'content': 0.07947784662246704, 'timestamp': '2025-09-30 22:33:57.522976', 'step': 17654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.564582', 'step': 17654, 'epoch': 3} {'type': 'loss', 'content': 0.16916796565055847, 'timestamp': '2025-09-30 22:33:57.568773', 'step': 17655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.617723', 'step': 17655, 'epoch': 3} {'type': 'loss', 'content': 0.08837935328483582, 'timestamp': '2025-09-30 22:33:57.645897', 'step': 17656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:57.683661', 'step': 17656, 'epoch': 3} {'type': 'loss', 'content': 0.027443943545222282, 'timestamp': '2025-09-30 22:33:57.686455', 'step': 17657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.720599', 'step': 17657, 'epoch': 3} {'type': 'loss', 'content': 0.09738048911094666, 'timestamp': '2025-09-30 22:33:57.724289', 'step': 17658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.759478', 'step': 17658, 'epoch': 3} {'type': 'loss', 'content': 0.06421583145856857, 'timestamp': '2025-09-30 22:33:57.763535', 'step': 17659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.794688', 'step': 17659, 'epoch': 3} {'type': 'loss', 'content': 0.1260296255350113, 'timestamp': '2025-09-30 22:33:57.819129', 'step': 17660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:57.858521', 'step': 17660, 'epoch': 3} {'type': 'loss', 'content': 0.11568096280097961, 'timestamp': '2025-09-30 22:33:57.861116', 'step': 17661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:57.892617', 'step': 17661, 'epoch': 3} {'type': 'loss', 'content': 0.03295797109603882, 'timestamp': '2025-09-30 22:33:57.904384', 'step': 17662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:57.934953', 'step': 17662, 'epoch': 3} {'type': 'loss', 'content': 0.057148151099681854, 'timestamp': '2025-09-30 22:33:57.944522', 'step': 17663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:57.982373', 'step': 17663, 'epoch': 3} {'type': 'loss', 'content': 0.047844938933849335, 'timestamp': '2025-09-30 22:33:58.018964', 'step': 17664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:58.052449', 'step': 17664, 'epoch': 3} {'type': 'loss', 'content': 0.08174046874046326, 'timestamp': '2025-09-30 22:33:58.056345', 'step': 17665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:58.090703', 'step': 17665, 'epoch': 3} {'type': 'loss', 'content': 0.06854166835546494, 'timestamp': '2025-09-30 22:33:58.094320', 'step': 17666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:58.133553', 'step': 17666, 'epoch': 3} {'type': 'loss', 'content': 0.05355602130293846, 'timestamp': '2025-09-30 22:33:58.137293', 'step': 17667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:58.180237', 'step': 17667, 'epoch': 3} {'type': 'loss', 'content': 0.05376158654689789, 'timestamp': '2025-09-30 22:33:58.215910', 'step': 17668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:33:58.257273', 'step': 17668, 'epoch': 3} {'type': 'loss', 'content': 0.08866693079471588, 'timestamp': '2025-09-30 22:33:58.260238', 'step': 17669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:58.291278', 'step': 17669, 'epoch': 3} {'type': 'loss', 'content': 0.07336152344942093, 'timestamp': '2025-09-30 22:33:58.301643', 'step': 17670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:58.344068', 'step': 17670, 'epoch': 3} {'type': 'loss', 'content': 0.038333240896463394, 'timestamp': '2025-09-30 22:33:58.352420', 'step': 17671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:58.390411', 'step': 17671, 'epoch': 3} {'type': 'loss', 'content': 0.04423365741968155, 'timestamp': '2025-09-30 22:33:58.415628', 'step': 17672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:58.457891', 'step': 17672, 'epoch': 3} {'type': 'loss', 'content': 0.019359668716788292, 'timestamp': '2025-09-30 22:33:58.463844', 'step': 17673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:58.502678', 'step': 17673, 'epoch': 3} {'type': 'loss', 'content': 0.15619215369224548, 'timestamp': '2025-09-30 22:33:58.510898', 'step': 17674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:58.561469', 'step': 17674, 'epoch': 3} {'type': 'loss', 'content': 0.16548661887645721, 'timestamp': '2025-09-30 22:33:58.574732', 'step': 17675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:58.610434', 'step': 17675, 'epoch': 3} {'type': 'loss', 'content': 0.05651991441845894, 'timestamp': '2025-09-30 22:33:58.638796', 'step': 17676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:58.678196', 'step': 17676, 'epoch': 3} {'type': 'loss', 'content': 0.0583188459277153, 'timestamp': '2025-09-30 22:33:58.689494', 'step': 17677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:58.721302', 'step': 17677, 'epoch': 3} {'type': 'loss', 'content': 0.103962741792202, 'timestamp': '2025-09-30 22:33:58.725118', 'step': 17678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:58.771350', 'step': 17678, 'epoch': 3} {'type': 'loss', 'content': 0.07991781085729599, 'timestamp': '2025-09-30 22:33:58.777241', 'step': 17679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:58.808005', 'step': 17679, 'epoch': 3} {'type': 'loss', 'content': 0.029076354578137398, 'timestamp': '2025-09-30 22:33:58.837988', 'step': 17680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:58.885845', 'step': 17680, 'epoch': 3} {'type': 'loss', 'content': 0.16370128095149994, 'timestamp': '2025-09-30 22:33:58.889771', 'step': 17681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:58.921335', 'step': 17681, 'epoch': 3} {'type': 'loss', 'content': 0.05617336183786392, 'timestamp': '2025-09-30 22:33:58.929986', 'step': 17682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:33:58.966272', 'step': 17682, 'epoch': 3} {'type': 'loss', 'content': 0.020434794947504997, 'timestamp': '2025-09-30 22:33:58.973293', 'step': 17683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:59.007540', 'step': 17683, 'epoch': 3} {'type': 'loss', 'content': 0.08443742990493774, 'timestamp': '2025-09-30 22:33:59.033378', 'step': 17684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.075290', 'step': 17684, 'epoch': 3} {'type': 'loss', 'content': 0.07838445901870728, 'timestamp': '2025-09-30 22:33:59.078596', 'step': 17685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:59.122820', 'step': 17685, 'epoch': 3} {'type': 'loss', 'content': 0.09177515655755997, 'timestamp': '2025-09-30 22:33:59.132233', 'step': 17686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:59.162998', 'step': 17686, 'epoch': 3} {'type': 'loss', 'content': 0.07344293594360352, 'timestamp': '2025-09-30 22:33:59.166565', 'step': 17687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.208291', 'step': 17687, 'epoch': 3} {'type': 'loss', 'content': 0.02100238762795925, 'timestamp': '2025-09-30 22:33:59.233253', 'step': 17688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.266280', 'step': 17688, 'epoch': 3} {'type': 'loss', 'content': 0.09005779027938843, 'timestamp': '2025-09-30 22:33:59.274315', 'step': 17689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.309593', 'step': 17689, 'epoch': 3} {'type': 'loss', 'content': 0.13188673555850983, 'timestamp': '2025-09-30 22:33:59.314448', 'step': 17690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.346864', 'step': 17690, 'epoch': 3} {'type': 'loss', 'content': 0.06311860680580139, 'timestamp': '2025-09-30 22:33:59.362391', 'step': 17691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.395817', 'step': 17691, 'epoch': 3} {'type': 'loss', 'content': 0.17011375725269318, 'timestamp': '2025-09-30 22:33:59.420974', 'step': 17692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:33:59.453802', 'step': 17692, 'epoch': 3} {'type': 'loss', 'content': 0.028429627418518066, 'timestamp': '2025-09-30 22:33:59.458026', 'step': 17693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.493501', 'step': 17693, 'epoch': 3} {'type': 'loss', 'content': 0.13258379697799683, 'timestamp': '2025-09-30 22:33:59.506406', 'step': 17694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:33:59.539812', 'step': 17694, 'epoch': 3} {'type': 'loss', 'content': 0.10793104022741318, 'timestamp': '2025-09-30 22:33:59.548782', 'step': 17695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.589796', 'step': 17695, 'epoch': 3} {'type': 'loss', 'content': 0.049351342022418976, 'timestamp': '2025-09-30 22:33:59.621419', 'step': 17696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:33:59.662116', 'step': 17696, 'epoch': 3} {'type': 'loss', 'content': 0.11804831027984619, 'timestamp': '2025-09-30 22:33:59.665817', 'step': 17697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.700584', 'step': 17697, 'epoch': 3} {'type': 'loss', 'content': 0.08198095113039017, 'timestamp': '2025-09-30 22:33:59.705287', 'step': 17698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.746787', 'step': 17698, 'epoch': 3} {'type': 'loss', 'content': 0.07811129093170166, 'timestamp': '2025-09-30 22:33:59.754754', 'step': 17699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:33:59.788446', 'step': 17699, 'epoch': 3} {'type': 'loss', 'content': 0.09359488636255264, 'timestamp': '2025-09-30 22:33:59.829217', 'step': 17700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.869316', 'step': 17700, 'epoch': 3} {'type': 'loss', 'content': 0.046234942972660065, 'timestamp': '2025-09-30 22:33:59.882702', 'step': 17701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.913324', 'step': 17701, 'epoch': 3} {'type': 'loss', 'content': 0.10976705700159073, 'timestamp': '2025-09-30 22:33:59.929877', 'step': 17702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:33:59.965861', 'step': 17702, 'epoch': 3} {'type': 'loss', 'content': 0.1008397564291954, 'timestamp': '2025-09-30 22:33:59.978501', 'step': 17703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:00.010328', 'step': 17703, 'epoch': 3} {'type': 'loss', 'content': 0.09013546258211136, 'timestamp': '2025-09-30 22:34:00.037364', 'step': 17704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:00.082209', 'step': 17704, 'epoch': 3} {'type': 'loss', 'content': 0.025939039885997772, 'timestamp': '2025-09-30 22:34:00.085437', 'step': 17705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:00.116631', 'step': 17705, 'epoch': 3} {'type': 'loss', 'content': 0.12163046002388, 'timestamp': '2025-09-30 22:34:00.120000', 'step': 17706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:00.153553', 'step': 17706, 'epoch': 3} {'type': 'loss', 'content': 0.0813482403755188, 'timestamp': '2025-09-30 22:34:00.158187', 'step': 17707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:00.195596', 'step': 17707, 'epoch': 3} {'type': 'loss', 'content': 0.060522958636283875, 'timestamp': '2025-09-30 22:34:00.222756', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:34:08.472484', 'step': 17708, 'epoch': 3} {'type': 'pplx', 'content': 12484.888937878753, 'timestamp': '2025-09-30 22:34:08.476907', 'step': 17708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:08.508527', 'step': 17708, 'epoch': 3} {'type': 'loss', 'content': 0.039828844368457794, 'timestamp': '2025-09-30 22:34:08.511424', 'step': 17709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:08.555946', 'step': 17709, 'epoch': 3} {'type': 'loss', 'content': 0.08730341494083405, 'timestamp': '2025-09-30 22:34:08.571902', 'step': 17710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:08.604789', 'step': 17710, 'epoch': 3} {'type': 'loss', 'content': 0.052337389439344406, 'timestamp': '2025-09-30 22:34:08.621378', 'step': 17711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:08.671216', 'step': 17711, 'epoch': 3} {'type': 'loss', 'content': 0.1744084656238556, 'timestamp': '2025-09-30 22:34:08.699173', 'step': 17712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:08.747911', 'step': 17712, 'epoch': 3} {'type': 'loss', 'content': 0.07732123881578445, 'timestamp': '2025-09-30 22:34:08.752759', 'step': 17713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:08.785770', 'step': 17713, 'epoch': 3} {'type': 'loss', 'content': 0.09662364423274994, 'timestamp': '2025-09-30 22:34:08.790135', 'step': 17714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:08.822670', 'step': 17714, 'epoch': 3} {'type': 'loss', 'content': 0.07212714105844498, 'timestamp': '2025-09-30 22:34:08.836028', 'step': 17715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:08.868400', 'step': 17715, 'epoch': 3} {'type': 'loss', 'content': 0.04847203940153122, 'timestamp': '2025-09-30 22:34:08.893919', 'step': 17716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:08.930619', 'step': 17716, 'epoch': 3} {'type': 'loss', 'content': 0.06537832319736481, 'timestamp': '2025-09-30 22:34:08.937111', 'step': 17717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:08.971232', 'step': 17717, 'epoch': 3} {'type': 'loss', 'content': 0.0921296626329422, 'timestamp': '2025-09-30 22:34:08.975644', 'step': 17718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:09.006362', 'step': 17718, 'epoch': 3} {'type': 'loss', 'content': 0.015248654410243034, 'timestamp': '2025-09-30 22:34:09.011584', 'step': 17719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.044060', 'step': 17719, 'epoch': 3} {'type': 'loss', 'content': 0.08964815735816956, 'timestamp': '2025-09-30 22:34:09.068337', 'step': 17720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:09.101111', 'step': 17720, 'epoch': 3} {'type': 'loss', 'content': 0.027287881821393967, 'timestamp': '2025-09-30 22:34:09.103369', 'step': 17721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.139335', 'step': 17721, 'epoch': 3} {'type': 'loss', 'content': 0.08199043571949005, 'timestamp': '2025-09-30 22:34:09.147045', 'step': 17722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:09.185400', 'step': 17722, 'epoch': 3} {'type': 'loss', 'content': 0.04683037847280502, 'timestamp': '2025-09-30 22:34:09.189236', 'step': 17723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.221906', 'step': 17723, 'epoch': 3} {'type': 'loss', 'content': 0.09354493767023087, 'timestamp': '2025-09-30 22:34:09.251761', 'step': 17724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.287638', 'step': 17724, 'epoch': 3} {'type': 'loss', 'content': 0.07608582824468613, 'timestamp': '2025-09-30 22:34:09.292481', 'step': 17725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.326457', 'step': 17725, 'epoch': 3} {'type': 'loss', 'content': 0.03326207026839256, 'timestamp': '2025-09-30 22:34:09.331441', 'step': 17726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:09.370299', 'step': 17726, 'epoch': 3} {'type': 'loss', 'content': 0.157374307513237, 'timestamp': '2025-09-30 22:34:09.384404', 'step': 17727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.419666', 'step': 17727, 'epoch': 3} {'type': 'loss', 'content': 0.08393856137990952, 'timestamp': '2025-09-30 22:34:09.444190', 'step': 17728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:09.474039', 'step': 17728, 'epoch': 3} {'type': 'loss', 'content': 0.07395950704813004, 'timestamp': '2025-09-30 22:34:09.477929', 'step': 17729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.513240', 'step': 17729, 'epoch': 3} {'type': 'loss', 'content': 0.1239096000790596, 'timestamp': '2025-09-30 22:34:09.517319', 'step': 17730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.556161', 'step': 17730, 'epoch': 3} {'type': 'loss', 'content': 0.04579351842403412, 'timestamp': '2025-09-30 22:34:09.561123', 'step': 17731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.594149', 'step': 17731, 'epoch': 3} {'type': 'loss', 'content': 0.01320019830018282, 'timestamp': '2025-09-30 22:34:09.619583', 'step': 17732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:09.650060', 'step': 17732, 'epoch': 3} {'type': 'loss', 'content': 0.05650259926915169, 'timestamp': '2025-09-30 22:34:09.654329', 'step': 17733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.692044', 'step': 17733, 'epoch': 3} {'type': 'loss', 'content': 0.05287764593958855, 'timestamp': '2025-09-30 22:34:09.695061', 'step': 17734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.725823', 'step': 17734, 'epoch': 3} {'type': 'loss', 'content': 0.03165121749043465, 'timestamp': '2025-09-30 22:34:09.728594', 'step': 17735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.769493', 'step': 17735, 'epoch': 3} {'type': 'loss', 'content': 0.07440698146820068, 'timestamp': '2025-09-30 22:34:09.797756', 'step': 17736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:09.833683', 'step': 17736, 'epoch': 3} {'type': 'loss', 'content': 0.04080100730061531, 'timestamp': '2025-09-30 22:34:09.838842', 'step': 17737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:09.872143', 'step': 17737, 'epoch': 3} {'type': 'loss', 'content': 0.03166467696428299, 'timestamp': '2025-09-30 22:34:09.874833', 'step': 17738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:09.910035', 'step': 17738, 'epoch': 3} {'type': 'loss', 'content': 0.16628536581993103, 'timestamp': '2025-09-30 22:34:09.913247', 'step': 17739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:09.942681', 'step': 17739, 'epoch': 3} {'type': 'loss', 'content': 0.1064072921872139, 'timestamp': '2025-09-30 22:34:09.966545', 'step': 17740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.001109', 'step': 17740, 'epoch': 3} {'type': 'loss', 'content': 0.06895811855792999, 'timestamp': '2025-09-30 22:34:10.009937', 'step': 17741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.042184', 'step': 17741, 'epoch': 3} {'type': 'loss', 'content': 0.09678807109594345, 'timestamp': '2025-09-30 22:34:10.044675', 'step': 17742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:10.075265', 'step': 17742, 'epoch': 3} {'type': 'loss', 'content': 0.02655898965895176, 'timestamp': '2025-09-30 22:34:10.086430', 'step': 17743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.120108', 'step': 17743, 'epoch': 3} {'type': 'loss', 'content': 0.041322171688079834, 'timestamp': '2025-09-30 22:34:10.145207', 'step': 17744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:10.175679', 'step': 17744, 'epoch': 3} {'type': 'loss', 'content': 0.09833631664514542, 'timestamp': '2025-09-30 22:34:10.178697', 'step': 17745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:10.209859', 'step': 17745, 'epoch': 3} {'type': 'loss', 'content': 0.05182022973895073, 'timestamp': '2025-09-30 22:34:10.214108', 'step': 17746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.245038', 'step': 17746, 'epoch': 3} {'type': 'loss', 'content': 0.05874321982264519, 'timestamp': '2025-09-30 22:34:10.249474', 'step': 17747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:10.281148', 'step': 17747, 'epoch': 3} {'type': 'loss', 'content': 0.051214780658483505, 'timestamp': '2025-09-30 22:34:10.313713', 'step': 17748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.346112', 'step': 17748, 'epoch': 3} {'type': 'loss', 'content': 0.01772494986653328, 'timestamp': '2025-09-30 22:34:10.362111', 'step': 17749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.411324', 'step': 17749, 'epoch': 3} {'type': 'loss', 'content': 0.09566105157136917, 'timestamp': '2025-09-30 22:34:10.414965', 'step': 17750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.448219', 'step': 17750, 'epoch': 3} {'type': 'loss', 'content': 0.08601807802915573, 'timestamp': '2025-09-30 22:34:10.451703', 'step': 17751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.483816', 'step': 17751, 'epoch': 3} {'type': 'loss', 'content': 0.046571146696805954, 'timestamp': '2025-09-30 22:34:10.508832', 'step': 17752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.539548', 'step': 17752, 'epoch': 3} {'type': 'loss', 'content': 0.027299022302031517, 'timestamp': '2025-09-30 22:34:10.551048', 'step': 17753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:10.593487', 'step': 17753, 'epoch': 3} {'type': 'loss', 'content': 0.04372647404670715, 'timestamp': '2025-09-30 22:34:10.599731', 'step': 17754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:10.635960', 'step': 17754, 'epoch': 3} {'type': 'loss', 'content': 0.03058810904622078, 'timestamp': '2025-09-30 22:34:10.639652', 'step': 17755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:10.673723', 'step': 17755, 'epoch': 3} {'type': 'loss', 'content': 0.11753018200397491, 'timestamp': '2025-09-30 22:34:10.698386', 'step': 17756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:10.737534', 'step': 17756, 'epoch': 3} {'type': 'loss', 'content': 0.09362863749265671, 'timestamp': '2025-09-30 22:34:10.744349', 'step': 17757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:10.778114', 'step': 17757, 'epoch': 3} {'type': 'loss', 'content': 0.09577171504497528, 'timestamp': '2025-09-30 22:34:10.780532', 'step': 17758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.817882', 'step': 17758, 'epoch': 3} {'type': 'loss', 'content': 0.03378569334745407, 'timestamp': '2025-09-30 22:34:10.820116', 'step': 17759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.852928', 'step': 17759, 'epoch': 3} {'type': 'loss', 'content': 0.09775178134441376, 'timestamp': '2025-09-30 22:34:10.877959', 'step': 17760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:10.920598', 'step': 17760, 'epoch': 3} {'type': 'loss', 'content': 0.09867703914642334, 'timestamp': '2025-09-30 22:34:10.923604', 'step': 17761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:10.958361', 'step': 17761, 'epoch': 3} {'type': 'loss', 'content': 0.06241406127810478, 'timestamp': '2025-09-30 22:34:10.962695', 'step': 17762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:10.994658', 'step': 17762, 'epoch': 3} {'type': 'loss', 'content': 0.05057808756828308, 'timestamp': '2025-09-30 22:34:10.997821', 'step': 17763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.029759', 'step': 17763, 'epoch': 3} {'type': 'loss', 'content': 0.09337752312421799, 'timestamp': '2025-09-30 22:34:11.057027', 'step': 17764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.092554', 'step': 17764, 'epoch': 3} {'type': 'loss', 'content': 0.042494457215070724, 'timestamp': '2025-09-30 22:34:11.095192', 'step': 17765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.125668', 'step': 17765, 'epoch': 3} {'type': 'loss', 'content': 0.10627463459968567, 'timestamp': '2025-09-30 22:34:11.128822', 'step': 17766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.173062', 'step': 17766, 'epoch': 3} {'type': 'loss', 'content': 0.06688347458839417, 'timestamp': '2025-09-30 22:34:11.182345', 'step': 17767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:11.215341', 'step': 17767, 'epoch': 3} {'type': 'loss', 'content': 0.05507524311542511, 'timestamp': '2025-09-30 22:34:11.247735', 'step': 17768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.288881', 'step': 17768, 'epoch': 3} {'type': 'loss', 'content': 0.05215377360582352, 'timestamp': '2025-09-30 22:34:11.291308', 'step': 17769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.326469', 'step': 17769, 'epoch': 3} {'type': 'loss', 'content': 0.04645084962248802, 'timestamp': '2025-09-30 22:34:11.339365', 'step': 17770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.373448', 'step': 17770, 'epoch': 3} {'type': 'loss', 'content': 0.061875853687524796, 'timestamp': '2025-09-30 22:34:11.377712', 'step': 17771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.411016', 'step': 17771, 'epoch': 3} {'type': 'loss', 'content': 0.06969134509563446, 'timestamp': '2025-09-30 22:34:11.436544', 'step': 17772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.475674', 'step': 17772, 'epoch': 3} {'type': 'loss', 'content': 0.04448995366692543, 'timestamp': '2025-09-30 22:34:11.478719', 'step': 17773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:11.518401', 'step': 17773, 'epoch': 3} {'type': 'loss', 'content': 0.05050383508205414, 'timestamp': '2025-09-30 22:34:11.528114', 'step': 17774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:11.566533', 'step': 17774, 'epoch': 3} {'type': 'loss', 'content': 0.05742133408784866, 'timestamp': '2025-09-30 22:34:11.571362', 'step': 17775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.603622', 'step': 17775, 'epoch': 3} {'type': 'loss', 'content': 0.039362240582704544, 'timestamp': '2025-09-30 22:34:11.629231', 'step': 17776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.676389', 'step': 17776, 'epoch': 3} {'type': 'loss', 'content': 0.0677003338932991, 'timestamp': '2025-09-30 22:34:11.679396', 'step': 17777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.713538', 'step': 17777, 'epoch': 3} {'type': 'loss', 'content': 0.042674582451581955, 'timestamp': '2025-09-30 22:34:11.718929', 'step': 17778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:11.763391', 'step': 17778, 'epoch': 3} {'type': 'loss', 'content': 0.09677909314632416, 'timestamp': '2025-09-30 22:34:11.783263', 'step': 17779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.829673', 'step': 17779, 'epoch': 3} {'type': 'loss', 'content': 0.0941929742693901, 'timestamp': '2025-09-30 22:34:11.860157', 'step': 17780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.904103', 'step': 17780, 'epoch': 3} {'type': 'loss', 'content': 0.0495026633143425, 'timestamp': '2025-09-30 22:34:11.909195', 'step': 17781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:11.941677', 'step': 17781, 'epoch': 3} {'type': 'loss', 'content': 0.08308340609073639, 'timestamp': '2025-09-30 22:34:11.945612', 'step': 17782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:11.987536', 'step': 17782, 'epoch': 3} {'type': 'loss', 'content': 0.018087653443217278, 'timestamp': '2025-09-30 22:34:11.998829', 'step': 17783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.044458', 'step': 17783, 'epoch': 3} {'type': 'loss', 'content': 0.04784700646996498, 'timestamp': '2025-09-30 22:34:12.069398', 'step': 17784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:12.102635', 'step': 17784, 'epoch': 3} {'type': 'loss', 'content': 0.06659980118274689, 'timestamp': '2025-09-30 22:34:12.107066', 'step': 17785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:12.151684', 'step': 17785, 'epoch': 3} {'type': 'loss', 'content': 0.041830822825431824, 'timestamp': '2025-09-30 22:34:12.155473', 'step': 17786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.190517', 'step': 17786, 'epoch': 3} {'type': 'loss', 'content': 0.04416421055793762, 'timestamp': '2025-09-30 22:34:12.193230', 'step': 17787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.224933', 'step': 17787, 'epoch': 3} {'type': 'loss', 'content': 0.043476708233356476, 'timestamp': '2025-09-30 22:34:12.251392', 'step': 17788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:12.282865', 'step': 17788, 'epoch': 3} {'type': 'loss', 'content': 0.10924843698740005, 'timestamp': '2025-09-30 22:34:12.298924', 'step': 17789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:12.337451', 'step': 17789, 'epoch': 3} {'type': 'loss', 'content': 0.051211606711149216, 'timestamp': '2025-09-30 22:34:12.346405', 'step': 17790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.382242', 'step': 17790, 'epoch': 3} {'type': 'loss', 'content': 0.054836489260196686, 'timestamp': '2025-09-30 22:34:12.387137', 'step': 17791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.437540', 'step': 17791, 'epoch': 3} {'type': 'loss', 'content': 0.024806680157780647, 'timestamp': '2025-09-30 22:34:12.463880', 'step': 17792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:12.495349', 'step': 17792, 'epoch': 3} {'type': 'loss', 'content': 0.11794580519199371, 'timestamp': '2025-09-30 22:34:12.499706', 'step': 17793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:12.532012', 'step': 17793, 'epoch': 3} {'type': 'loss', 'content': 0.06274230778217316, 'timestamp': '2025-09-30 22:34:12.540657', 'step': 17794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:12.577700', 'step': 17794, 'epoch': 3} {'type': 'loss', 'content': 0.026664970442652702, 'timestamp': '2025-09-30 22:34:12.591818', 'step': 17795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:12.622826', 'step': 17795, 'epoch': 3} {'type': 'loss', 'content': 0.06796737760305405, 'timestamp': '2025-09-30 22:34:12.647483', 'step': 17796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:12.678413', 'step': 17796, 'epoch': 3} {'type': 'loss', 'content': 0.13707920908927917, 'timestamp': '2025-09-30 22:34:12.680544', 'step': 17797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:12.712116', 'step': 17797, 'epoch': 3} {'type': 'loss', 'content': 0.11843977868556976, 'timestamp': '2025-09-30 22:34:12.716091', 'step': 17798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:12.749493', 'step': 17798, 'epoch': 3} {'type': 'loss', 'content': 0.05348449945449829, 'timestamp': '2025-09-30 22:34:12.764511', 'step': 17799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:12.797543', 'step': 17799, 'epoch': 3} {'type': 'loss', 'content': 0.09981279820203781, 'timestamp': '2025-09-30 22:34:12.828994', 'step': 17800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:12.866149', 'step': 17800, 'epoch': 3} {'type': 'loss', 'content': 0.06217331066727638, 'timestamp': '2025-09-30 22:34:12.869302', 'step': 17801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:12.905304', 'step': 17801, 'epoch': 3} {'type': 'loss', 'content': 0.13217729330062866, 'timestamp': '2025-09-30 22:34:12.917998', 'step': 17802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:12.959595', 'step': 17802, 'epoch': 3} {'type': 'loss', 'content': 0.08046125620603561, 'timestamp': '2025-09-30 22:34:12.968515', 'step': 17803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:13.003557', 'step': 17803, 'epoch': 3} {'type': 'loss', 'content': 0.10205206274986267, 'timestamp': '2025-09-30 22:34:13.037694', 'step': 17804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:13.079028', 'step': 17804, 'epoch': 3} {'type': 'loss', 'content': 0.019731005653738976, 'timestamp': '2025-09-30 22:34:13.086086', 'step': 17805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.126459', 'step': 17805, 'epoch': 3} {'type': 'loss', 'content': 0.023133937269449234, 'timestamp': '2025-09-30 22:34:13.131407', 'step': 17806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.163827', 'step': 17806, 'epoch': 3} {'type': 'loss', 'content': 0.043956365436315536, 'timestamp': '2025-09-30 22:34:13.167439', 'step': 17807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:13.199857', 'step': 17807, 'epoch': 3} {'type': 'loss', 'content': 0.056407418102025986, 'timestamp': '2025-09-30 22:34:13.225415', 'step': 17808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:13.259293', 'step': 17808, 'epoch': 3} {'type': 'loss', 'content': 0.06555109471082687, 'timestamp': '2025-09-30 22:34:13.275228', 'step': 17809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:13.307111', 'step': 17809, 'epoch': 3} {'type': 'loss', 'content': 0.036713726818561554, 'timestamp': '2025-09-30 22:34:13.312489', 'step': 17810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:13.346631', 'step': 17810, 'epoch': 3} {'type': 'loss', 'content': 0.05742788314819336, 'timestamp': '2025-09-30 22:34:13.350979', 'step': 17811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:13.384305', 'step': 17811, 'epoch': 3} {'type': 'loss', 'content': 0.05422384291887283, 'timestamp': '2025-09-30 22:34:13.409727', 'step': 17812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:13.442512', 'step': 17812, 'epoch': 3} {'type': 'loss', 'content': 0.06237008795142174, 'timestamp': '2025-09-30 22:34:13.448354', 'step': 17813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.489788', 'step': 17813, 'epoch': 3} {'type': 'loss', 'content': 0.10672292858362198, 'timestamp': '2025-09-30 22:34:13.494829', 'step': 17814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.528172', 'step': 17814, 'epoch': 3} {'type': 'loss', 'content': 0.05186809226870537, 'timestamp': '2025-09-30 22:34:13.540010', 'step': 17815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:13.575048', 'step': 17815, 'epoch': 3} {'type': 'loss', 'content': 0.11612904071807861, 'timestamp': '2025-09-30 22:34:13.600620', 'step': 17816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.636401', 'step': 17816, 'epoch': 3} {'type': 'loss', 'content': 0.03739830479025841, 'timestamp': '2025-09-30 22:34:13.644494', 'step': 17817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.681268', 'step': 17817, 'epoch': 3} {'type': 'loss', 'content': 0.08544287085533142, 'timestamp': '2025-09-30 22:34:13.687681', 'step': 17818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.720779', 'step': 17818, 'epoch': 3} {'type': 'loss', 'content': 0.14277607202529907, 'timestamp': '2025-09-30 22:34:13.726306', 'step': 17819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:13.761277', 'step': 17819, 'epoch': 3} {'type': 'loss', 'content': 0.05351477488875389, 'timestamp': '2025-09-30 22:34:13.789707', 'step': 17820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.820984', 'step': 17820, 'epoch': 3} {'type': 'loss', 'content': 0.12495410442352295, 'timestamp': '2025-09-30 22:34:13.835876', 'step': 17821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.867714', 'step': 17821, 'epoch': 3} {'type': 'loss', 'content': 0.042869895696640015, 'timestamp': '2025-09-30 22:34:13.877329', 'step': 17822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:13.916830', 'step': 17822, 'epoch': 3} {'type': 'loss', 'content': 0.07024544477462769, 'timestamp': '2025-09-30 22:34:13.920449', 'step': 17823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:13.952482', 'step': 17823, 'epoch': 3} {'type': 'loss', 'content': 0.024159256368875504, 'timestamp': '2025-09-30 22:34:13.981634', 'step': 17824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:14.013024', 'step': 17824, 'epoch': 3} {'type': 'loss', 'content': 0.04790160059928894, 'timestamp': '2025-09-30 22:34:14.021295', 'step': 17825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:14.051484', 'step': 17825, 'epoch': 3} {'type': 'loss', 'content': 0.18162477016448975, 'timestamp': '2025-09-30 22:34:14.054845', 'step': 17826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.086674', 'step': 17826, 'epoch': 3} {'type': 'loss', 'content': 0.06735222041606903, 'timestamp': '2025-09-30 22:34:14.092856', 'step': 17827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.128147', 'step': 17827, 'epoch': 3} {'type': 'loss', 'content': 0.0767245665192604, 'timestamp': '2025-09-30 22:34:14.160750', 'step': 17828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:14.198179', 'step': 17828, 'epoch': 3} {'type': 'loss', 'content': 0.05575679615139961, 'timestamp': '2025-09-30 22:34:14.205515', 'step': 17829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.245434', 'step': 17829, 'epoch': 3} {'type': 'loss', 'content': 0.035697080194950104, 'timestamp': '2025-09-30 22:34:14.248398', 'step': 17830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.279543', 'step': 17830, 'epoch': 3} {'type': 'loss', 'content': 0.059549570083618164, 'timestamp': '2025-09-30 22:34:14.288638', 'step': 17831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:14.331127', 'step': 17831, 'epoch': 3} {'type': 'loss', 'content': 0.07860983163118362, 'timestamp': '2025-09-30 22:34:14.359104', 'step': 17832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:14.394169', 'step': 17832, 'epoch': 3} {'type': 'loss', 'content': 0.08763546496629715, 'timestamp': '2025-09-30 22:34:14.398413', 'step': 17833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:14.431440', 'step': 17833, 'epoch': 3} {'type': 'loss', 'content': 0.04777376353740692, 'timestamp': '2025-09-30 22:34:14.437195', 'step': 17834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.471371', 'step': 17834, 'epoch': 3} {'type': 'loss', 'content': 0.05992261692881584, 'timestamp': '2025-09-30 22:34:14.475022', 'step': 17835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:14.507487', 'step': 17835, 'epoch': 3} {'type': 'loss', 'content': 0.06390374898910522, 'timestamp': '2025-09-30 22:34:14.531685', 'step': 17836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.562286', 'step': 17836, 'epoch': 3} {'type': 'loss', 'content': 0.0686332955956459, 'timestamp': '2025-09-30 22:34:14.567390', 'step': 17837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:14.597836', 'step': 17837, 'epoch': 3} {'type': 'loss', 'content': 0.05212876573204994, 'timestamp': '2025-09-30 22:34:14.602080', 'step': 17838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:14.634393', 'step': 17838, 'epoch': 3} {'type': 'loss', 'content': 0.10407330840826035, 'timestamp': '2025-09-30 22:34:14.637150', 'step': 17839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.668627', 'step': 17839, 'epoch': 3} {'type': 'loss', 'content': 0.1370612233877182, 'timestamp': '2025-09-30 22:34:14.696202', 'step': 17840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:14.729430', 'step': 17840, 'epoch': 3} {'type': 'loss', 'content': 0.1147272139787674, 'timestamp': '2025-09-30 22:34:14.737292', 'step': 17841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:14.772260', 'step': 17841, 'epoch': 3} {'type': 'loss', 'content': 0.039065539836883545, 'timestamp': '2025-09-30 22:34:14.774785', 'step': 17842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:14.813851', 'step': 17842, 'epoch': 3} {'type': 'loss', 'content': 0.06265057623386383, 'timestamp': '2025-09-30 22:34:14.817683', 'step': 17843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:14.849841', 'step': 17843, 'epoch': 3} {'type': 'loss', 'content': 0.079373300075531, 'timestamp': '2025-09-30 22:34:14.875191', 'step': 17844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:14.918247', 'step': 17844, 'epoch': 3} {'type': 'loss', 'content': 0.1223674789071083, 'timestamp': '2025-09-30 22:34:14.921756', 'step': 17845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:14.955014', 'step': 17845, 'epoch': 3} {'type': 'loss', 'content': 0.06747080385684967, 'timestamp': '2025-09-30 22:34:14.967814', 'step': 17846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.003998', 'step': 17846, 'epoch': 3} {'type': 'loss', 'content': 0.023224545642733574, 'timestamp': '2025-09-30 22:34:15.008055', 'step': 17847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.042107', 'step': 17847, 'epoch': 3} {'type': 'loss', 'content': 0.04148809611797333, 'timestamp': '2025-09-30 22:34:15.069525', 'step': 17848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.103361', 'step': 17848, 'epoch': 3} {'type': 'loss', 'content': 0.12897057831287384, 'timestamp': '2025-09-30 22:34:15.112056', 'step': 17849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.144165', 'step': 17849, 'epoch': 3} {'type': 'loss', 'content': 0.07718469947576523, 'timestamp': '2025-09-30 22:34:15.150486', 'step': 17850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.186092', 'step': 17850, 'epoch': 3} {'type': 'loss', 'content': 0.042616985738277435, 'timestamp': '2025-09-30 22:34:15.190789', 'step': 17851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.222269', 'step': 17851, 'epoch': 3} {'type': 'loss', 'content': 0.08653924614191055, 'timestamp': '2025-09-30 22:34:15.248788', 'step': 17852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.281347', 'step': 17852, 'epoch': 3} {'type': 'loss', 'content': 0.13668641448020935, 'timestamp': '2025-09-30 22:34:15.287974', 'step': 17853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.323331', 'step': 17853, 'epoch': 3} {'type': 'loss', 'content': 0.12729975581169128, 'timestamp': '2025-09-30 22:34:15.325760', 'step': 17854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.372042', 'step': 17854, 'epoch': 3} {'type': 'loss', 'content': 0.05756235122680664, 'timestamp': '2025-09-30 22:34:15.376832', 'step': 17855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:15.422082', 'step': 17855, 'epoch': 3} {'type': 'loss', 'content': 0.1206396222114563, 'timestamp': '2025-09-30 22:34:15.460381', 'step': 17856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:15.495131', 'step': 17856, 'epoch': 3} {'type': 'loss', 'content': 0.1322178989648819, 'timestamp': '2025-09-30 22:34:15.498562', 'step': 17857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:15.530989', 'step': 17857, 'epoch': 3} {'type': 'loss', 'content': 0.12832540273666382, 'timestamp': '2025-09-30 22:34:15.533990', 'step': 17858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.565557', 'step': 17858, 'epoch': 3} {'type': 'loss', 'content': 0.1058988869190216, 'timestamp': '2025-09-30 22:34:15.575350', 'step': 17859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.608099', 'step': 17859, 'epoch': 3} {'type': 'loss', 'content': 0.11935770511627197, 'timestamp': '2025-09-30 22:34:15.632897', 'step': 17860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.667333', 'step': 17860, 'epoch': 3} {'type': 'loss', 'content': 0.09461376816034317, 'timestamp': '2025-09-30 22:34:15.670122', 'step': 17861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.704454', 'step': 17861, 'epoch': 3} {'type': 'loss', 'content': 0.05854753404855728, 'timestamp': '2025-09-30 22:34:15.709114', 'step': 17862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.741169', 'step': 17862, 'epoch': 3} {'type': 'loss', 'content': 0.06571044772863388, 'timestamp': '2025-09-30 22:34:15.747328', 'step': 17863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:15.783159', 'step': 17863, 'epoch': 3} {'type': 'loss', 'content': 0.03966508433222771, 'timestamp': '2025-09-30 22:34:15.813372', 'step': 17864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.847863', 'step': 17864, 'epoch': 3} {'type': 'loss', 'content': 0.08196001499891281, 'timestamp': '2025-09-30 22:34:15.864404', 'step': 17865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:15.901106', 'step': 17865, 'epoch': 3} {'type': 'loss', 'content': 0.053602952510118484, 'timestamp': '2025-09-30 22:34:15.904845', 'step': 17866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:15.937177', 'step': 17866, 'epoch': 3} {'type': 'loss', 'content': 0.09411261230707169, 'timestamp': '2025-09-30 22:34:15.948324', 'step': 17867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:15.984564', 'step': 17867, 'epoch': 3} {'type': 'loss', 'content': 0.07390367239713669, 'timestamp': '2025-09-30 22:34:16.008935', 'step': 17868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.041078', 'step': 17868, 'epoch': 3} {'type': 'loss', 'content': 0.050157174468040466, 'timestamp': '2025-09-30 22:34:16.045613', 'step': 17869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.077544', 'step': 17869, 'epoch': 3} {'type': 'loss', 'content': 0.044231120496988297, 'timestamp': '2025-09-30 22:34:16.090370', 'step': 17870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.124480', 'step': 17870, 'epoch': 3} {'type': 'loss', 'content': 0.05128762125968933, 'timestamp': '2025-09-30 22:34:16.136390', 'step': 17871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:16.168767', 'step': 17871, 'epoch': 3} {'type': 'loss', 'content': 0.16655157506465912, 'timestamp': '2025-09-30 22:34:16.194975', 'step': 17872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.226759', 'step': 17872, 'epoch': 3} {'type': 'loss', 'content': 0.09120006859302521, 'timestamp': '2025-09-30 22:34:16.231126', 'step': 17873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.264433', 'step': 17873, 'epoch': 3} {'type': 'loss', 'content': 0.12473003566265106, 'timestamp': '2025-09-30 22:34:16.280012', 'step': 17874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:16.312523', 'step': 17874, 'epoch': 3} {'type': 'loss', 'content': 0.033753518015146255, 'timestamp': '2025-09-30 22:34:16.322053', 'step': 17875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.353797', 'step': 17875, 'epoch': 3} {'type': 'loss', 'content': 0.09910710155963898, 'timestamp': '2025-09-30 22:34:16.390525', 'step': 17876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.425849', 'step': 17876, 'epoch': 3} {'type': 'loss', 'content': 0.06109039485454559, 'timestamp': '2025-09-30 22:34:16.429996', 'step': 17877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:16.462419', 'step': 17877, 'epoch': 3} {'type': 'loss', 'content': 0.1465386152267456, 'timestamp': '2025-09-30 22:34:16.473732', 'step': 17878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.505561', 'step': 17878, 'epoch': 3} {'type': 'loss', 'content': 0.07826268672943115, 'timestamp': '2025-09-30 22:34:16.516901', 'step': 17879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.549459', 'step': 17879, 'epoch': 3} {'type': 'loss', 'content': 0.17141838371753693, 'timestamp': '2025-09-30 22:34:16.574452', 'step': 17880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.609325', 'step': 17880, 'epoch': 3} {'type': 'loss', 'content': 0.08630350232124329, 'timestamp': '2025-09-30 22:34:16.616609', 'step': 17881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.651512', 'step': 17881, 'epoch': 3} {'type': 'loss', 'content': 0.09577241539955139, 'timestamp': '2025-09-30 22:34:16.654935', 'step': 17882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:16.686951', 'step': 17882, 'epoch': 3} {'type': 'loss', 'content': 0.10170372575521469, 'timestamp': '2025-09-30 22:34:16.693526', 'step': 17883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.731121', 'step': 17883, 'epoch': 3} {'type': 'loss', 'content': 0.1154661476612091, 'timestamp': '2025-09-30 22:34:16.756738', 'step': 17884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.793752', 'step': 17884, 'epoch': 3} {'type': 'loss', 'content': 0.07512932270765305, 'timestamp': '2025-09-30 22:34:16.804426', 'step': 17885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:16.836872', 'step': 17885, 'epoch': 3} {'type': 'loss', 'content': 0.08999565243721008, 'timestamp': '2025-09-30 22:34:16.840010', 'step': 17886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.877996', 'step': 17886, 'epoch': 3} {'type': 'loss', 'content': 0.08207211643457413, 'timestamp': '2025-09-30 22:34:16.880780', 'step': 17887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:16.911464', 'step': 17887, 'epoch': 3} {'type': 'loss', 'content': 0.03736229985952377, 'timestamp': '2025-09-30 22:34:16.937990', 'step': 17888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:16.974115', 'step': 17888, 'epoch': 3} {'type': 'loss', 'content': 0.08492512255907059, 'timestamp': '2025-09-30 22:34:16.976924', 'step': 17889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:17.009506', 'step': 17889, 'epoch': 3} {'type': 'loss', 'content': 0.07128742337226868, 'timestamp': '2025-09-30 22:34:17.013798', 'step': 17890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:17.052167', 'step': 17890, 'epoch': 3} {'type': 'loss', 'content': 0.09257543832063675, 'timestamp': '2025-09-30 22:34:17.060221', 'step': 17891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:17.095144', 'step': 17891, 'epoch': 3} {'type': 'loss', 'content': 0.13029128313064575, 'timestamp': '2025-09-30 22:34:17.123305', 'step': 17892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:17.156555', 'step': 17892, 'epoch': 3} {'type': 'loss', 'content': 0.11173545569181442, 'timestamp': '2025-09-30 22:34:17.160267', 'step': 17893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:17.190890', 'step': 17893, 'epoch': 3} {'type': 'loss', 'content': 0.03240267187356949, 'timestamp': '2025-09-30 22:34:17.195223', 'step': 17894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:17.228557', 'step': 17894, 'epoch': 3} {'type': 'loss', 'content': 0.0462791807949543, 'timestamp': '2025-09-30 22:34:17.232903', 'step': 17895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:17.264877', 'step': 17895, 'epoch': 3} {'type': 'loss', 'content': 0.05082348734140396, 'timestamp': '2025-09-30 22:34:17.290516', 'step': 17896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:34:17.336467', 'step': 17896, 'epoch': 3} {'type': 'loss', 'content': 0.0650373250246048, 'timestamp': '2025-09-30 22:34:17.342878', 'step': 17897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:17.374012', 'step': 17897, 'epoch': 3} {'type': 'loss', 'content': 0.04566800221800804, 'timestamp': '2025-09-30 22:34:17.377419', 'step': 17898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:17.408391', 'step': 17898, 'epoch': 3} {'type': 'loss', 'content': 0.03971419483423233, 'timestamp': '2025-09-30 22:34:17.413206', 'step': 17899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:17.446366', 'step': 17899, 'epoch': 3} {'type': 'loss', 'content': 0.07295190542936325, 'timestamp': '2025-09-30 22:34:17.471829', 'step': 17900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:17.503184', 'step': 17900, 'epoch': 3} {'type': 'loss', 'content': 0.08061141520738602, 'timestamp': '2025-09-30 22:34:17.507256', 'step': 17901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:17.541387', 'step': 17901, 'epoch': 3} {'type': 'loss', 'content': 0.06231134384870529, 'timestamp': '2025-09-30 22:34:17.546175', 'step': 17902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:17.582271', 'step': 17902, 'epoch': 3} {'type': 'loss', 'content': 0.046603426337242126, 'timestamp': '2025-09-30 22:34:17.586232', 'step': 17903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:17.631305', 'step': 17903, 'epoch': 3} {'type': 'loss', 'content': 0.07628461718559265, 'timestamp': '2025-09-30 22:34:17.663428', 'step': 17904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:17.695166', 'step': 17904, 'epoch': 3} {'type': 'loss', 'content': 0.05623781681060791, 'timestamp': '2025-09-30 22:34:17.701289', 'step': 17905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:17.735848', 'step': 17905, 'epoch': 3} {'type': 'loss', 'content': 0.04451233148574829, 'timestamp': '2025-09-30 22:34:17.741301', 'step': 17906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:17.781369', 'step': 17906, 'epoch': 3} {'type': 'loss', 'content': 0.04144524410367012, 'timestamp': '2025-09-30 22:34:17.784782', 'step': 17907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:17.816307', 'step': 17907, 'epoch': 3} {'type': 'loss', 'content': 0.09344605356454849, 'timestamp': '2025-09-30 22:34:17.853762', 'step': 17908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:17.885645', 'step': 17908, 'epoch': 3} {'type': 'loss', 'content': 0.13504259288311005, 'timestamp': '2025-09-30 22:34:17.889337', 'step': 17909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:17.928273', 'step': 17909, 'epoch': 3} {'type': 'loss', 'content': 0.0735500231385231, 'timestamp': '2025-09-30 22:34:17.937493', 'step': 17910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:17.974067', 'step': 17910, 'epoch': 3} {'type': 'loss', 'content': 0.09278849512338638, 'timestamp': '2025-09-30 22:34:17.978467', 'step': 17911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.010389', 'step': 17911, 'epoch': 3} {'type': 'loss', 'content': 0.08627768605947495, 'timestamp': '2025-09-30 22:34:18.036493', 'step': 17912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:18.071208', 'step': 17912, 'epoch': 3} {'type': 'loss', 'content': 0.09292905032634735, 'timestamp': '2025-09-30 22:34:18.074767', 'step': 17913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.106842', 'step': 17913, 'epoch': 3} {'type': 'loss', 'content': 0.09506836533546448, 'timestamp': '2025-09-30 22:34:18.111401', 'step': 17914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.144335', 'step': 17914, 'epoch': 3} {'type': 'loss', 'content': 0.07716574519872665, 'timestamp': '2025-09-30 22:34:18.147681', 'step': 17915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:18.179622', 'step': 17915, 'epoch': 3} {'type': 'loss', 'content': 0.04480605199933052, 'timestamp': '2025-09-30 22:34:18.206389', 'step': 17916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.238103', 'step': 17916, 'epoch': 3} {'type': 'loss', 'content': 0.06381241232156754, 'timestamp': '2025-09-30 22:34:18.248803', 'step': 17917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:18.283949', 'step': 17917, 'epoch': 3} {'type': 'loss', 'content': 0.06889600306749344, 'timestamp': '2025-09-30 22:34:18.286798', 'step': 17918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:18.318556', 'step': 17918, 'epoch': 3} {'type': 'loss', 'content': 0.06640032678842545, 'timestamp': '2025-09-30 22:34:18.336353', 'step': 17919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:18.374341', 'step': 17919, 'epoch': 3} {'type': 'loss', 'content': 0.045241858810186386, 'timestamp': '2025-09-30 22:34:18.400233', 'step': 17920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.436357', 'step': 17920, 'epoch': 3} {'type': 'loss', 'content': 0.04547562077641487, 'timestamp': '2025-09-30 22:34:18.439862', 'step': 17921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.471928', 'step': 17921, 'epoch': 3} {'type': 'loss', 'content': 0.1048593744635582, 'timestamp': '2025-09-30 22:34:18.475026', 'step': 17922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.506052', 'step': 17922, 'epoch': 3} {'type': 'loss', 'content': 0.06502262502908707, 'timestamp': '2025-09-30 22:34:18.522677', 'step': 17923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.561836', 'step': 17923, 'epoch': 3} {'type': 'loss', 'content': 0.05045958608388901, 'timestamp': '2025-09-30 22:34:18.586641', 'step': 17924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:18.623095', 'step': 17924, 'epoch': 3} {'type': 'loss', 'content': 0.05582847073674202, 'timestamp': '2025-09-30 22:34:18.630711', 'step': 17925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.664599', 'step': 17925, 'epoch': 3} {'type': 'loss', 'content': 0.09687843918800354, 'timestamp': '2025-09-30 22:34:18.669299', 'step': 17926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:18.700996', 'step': 17926, 'epoch': 3} {'type': 'loss', 'content': 0.08452290296554565, 'timestamp': '2025-09-30 22:34:18.706445', 'step': 17927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.739517', 'step': 17927, 'epoch': 3} {'type': 'loss', 'content': 0.024627644568681717, 'timestamp': '2025-09-30 22:34:18.765401', 'step': 17928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:18.799829', 'step': 17928, 'epoch': 3} {'type': 'loss', 'content': 0.07797641307115555, 'timestamp': '2025-09-30 22:34:18.802670', 'step': 17929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:18.834261', 'step': 17929, 'epoch': 3} {'type': 'loss', 'content': 0.14936065673828125, 'timestamp': '2025-09-30 22:34:18.837072', 'step': 17930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:18.872372', 'step': 17930, 'epoch': 3} {'type': 'loss', 'content': 0.04613044485449791, 'timestamp': '2025-09-30 22:34:18.881521', 'step': 17931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.919768', 'step': 17931, 'epoch': 3} {'type': 'loss', 'content': 0.004634110722690821, 'timestamp': '2025-09-30 22:34:18.946467', 'step': 17932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:18.983908', 'step': 17932, 'epoch': 3} {'type': 'loss', 'content': 0.05807575210928917, 'timestamp': '2025-09-30 22:34:18.987599', 'step': 17933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:19.020572', 'step': 17933, 'epoch': 3} {'type': 'loss', 'content': 0.0438416488468647, 'timestamp': '2025-09-30 22:34:19.032375', 'step': 17934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.065892', 'step': 17934, 'epoch': 3} {'type': 'loss', 'content': 0.02051340602338314, 'timestamp': '2025-09-30 22:34:19.070450', 'step': 17935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.101555', 'step': 17935, 'epoch': 3} {'type': 'loss', 'content': 0.09262363612651825, 'timestamp': '2025-09-30 22:34:19.127131', 'step': 17936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:19.163034', 'step': 17936, 'epoch': 3} {'type': 'loss', 'content': 0.045534662902355194, 'timestamp': '2025-09-30 22:34:19.167592', 'step': 17937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:19.198847', 'step': 17937, 'epoch': 3} {'type': 'loss', 'content': 0.11293566226959229, 'timestamp': '2025-09-30 22:34:19.201449', 'step': 17938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.232982', 'step': 17938, 'epoch': 3} {'type': 'loss', 'content': 0.022613244131207466, 'timestamp': '2025-09-30 22:34:19.236871', 'step': 17939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:19.277872', 'step': 17939, 'epoch': 3} {'type': 'loss', 'content': 0.047366902232170105, 'timestamp': '2025-09-30 22:34:19.307130', 'step': 17940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.338446', 'step': 17940, 'epoch': 3} {'type': 'loss', 'content': 0.058853331953287125, 'timestamp': '2025-09-30 22:34:19.341052', 'step': 17941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:19.372359', 'step': 17941, 'epoch': 3} {'type': 'loss', 'content': 0.07058296352624893, 'timestamp': '2025-09-30 22:34:19.376560', 'step': 17942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.409691', 'step': 17942, 'epoch': 3} {'type': 'loss', 'content': 0.0761953592300415, 'timestamp': '2025-09-30 22:34:19.413116', 'step': 17943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:19.443521', 'step': 17943, 'epoch': 3} {'type': 'loss', 'content': 0.035797521471977234, 'timestamp': '2025-09-30 22:34:19.469627', 'step': 17944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:19.508024', 'step': 17944, 'epoch': 3} {'type': 'loss', 'content': 0.06364965438842773, 'timestamp': '2025-09-30 22:34:19.511296', 'step': 17945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.547435', 'step': 17945, 'epoch': 3} {'type': 'loss', 'content': 0.011395406909286976, 'timestamp': '2025-09-30 22:34:19.550251', 'step': 17946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:19.580468', 'step': 17946, 'epoch': 3} {'type': 'loss', 'content': 0.0829348936676979, 'timestamp': '2025-09-30 22:34:19.589167', 'step': 17947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:19.627488', 'step': 17947, 'epoch': 3} {'type': 'loss', 'content': 0.08304431289434433, 'timestamp': '2025-09-30 22:34:19.652311', 'step': 17948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.691277', 'step': 17948, 'epoch': 3} {'type': 'loss', 'content': 0.054698213934898376, 'timestamp': '2025-09-30 22:34:19.695358', 'step': 17949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.728179', 'step': 17949, 'epoch': 3} {'type': 'loss', 'content': 0.07937674224376678, 'timestamp': '2025-09-30 22:34:19.734043', 'step': 17950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.775495', 'step': 17950, 'epoch': 3} {'type': 'loss', 'content': 0.0733114629983902, 'timestamp': '2025-09-30 22:34:19.782628', 'step': 17951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.824808', 'step': 17951, 'epoch': 3} {'type': 'loss', 'content': 0.05707642436027527, 'timestamp': '2025-09-30 22:34:19.851511', 'step': 17952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:19.895916', 'step': 17952, 'epoch': 3} {'type': 'loss', 'content': 0.08835671097040176, 'timestamp': '2025-09-30 22:34:19.900365', 'step': 17953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:19.931107', 'step': 17953, 'epoch': 3} {'type': 'loss', 'content': 0.07204436510801315, 'timestamp': '2025-09-30 22:34:19.934911', 'step': 17954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:19.967017', 'step': 17954, 'epoch': 3} {'type': 'loss', 'content': 0.049091316759586334, 'timestamp': '2025-09-30 22:34:19.970812', 'step': 17955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.002640', 'step': 17955, 'epoch': 3} {'type': 'loss', 'content': 0.12213987857103348, 'timestamp': '2025-09-30 22:34:20.027980', 'step': 17956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.060460', 'step': 17956, 'epoch': 3} {'type': 'loss', 'content': 0.03914474695920944, 'timestamp': '2025-09-30 22:34:20.066264', 'step': 17957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.099812', 'step': 17957, 'epoch': 3} {'type': 'loss', 'content': 0.12033653259277344, 'timestamp': '2025-09-30 22:34:20.103798', 'step': 17958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.145802', 'step': 17958, 'epoch': 3} {'type': 'loss', 'content': 0.027646729722619057, 'timestamp': '2025-09-30 22:34:20.149281', 'step': 17959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:20.181641', 'step': 17959, 'epoch': 3} {'type': 'loss', 'content': 0.08944153040647507, 'timestamp': '2025-09-30 22:34:20.213148', 'step': 17960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:20.244223', 'step': 17960, 'epoch': 3} {'type': 'loss', 'content': 0.04660314321517944, 'timestamp': '2025-09-30 22:34:20.248996', 'step': 17961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.286603', 'step': 17961, 'epoch': 3} {'type': 'loss', 'content': 0.05409831553697586, 'timestamp': '2025-09-30 22:34:20.295514', 'step': 17962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.340370', 'step': 17962, 'epoch': 3} {'type': 'loss', 'content': 0.08499912917613983, 'timestamp': '2025-09-30 22:34:20.344182', 'step': 17963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.381675', 'step': 17963, 'epoch': 3} {'type': 'loss', 'content': 0.06987379491329193, 'timestamp': '2025-09-30 22:34:20.411021', 'step': 17964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:20.459480', 'step': 17964, 'epoch': 3} {'type': 'loss', 'content': 0.13531732559204102, 'timestamp': '2025-09-30 22:34:20.463568', 'step': 17965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.508741', 'step': 17965, 'epoch': 3} {'type': 'loss', 'content': 0.04952765256166458, 'timestamp': '2025-09-30 22:34:20.518810', 'step': 17966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:20.558788', 'step': 17966, 'epoch': 3} {'type': 'loss', 'content': 0.05480343848466873, 'timestamp': '2025-09-30 22:34:20.566057', 'step': 17967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:20.603009', 'step': 17967, 'epoch': 3} {'type': 'loss', 'content': 0.061065953224897385, 'timestamp': '2025-09-30 22:34:20.626897', 'step': 17968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:20.665493', 'step': 17968, 'epoch': 3} {'type': 'loss', 'content': 0.045680541545152664, 'timestamp': '2025-09-30 22:34:20.676617', 'step': 17969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.710502', 'step': 17969, 'epoch': 3} {'type': 'loss', 'content': 0.03436891734600067, 'timestamp': '2025-09-30 22:34:20.719051', 'step': 17970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.751520', 'step': 17970, 'epoch': 3} {'type': 'loss', 'content': 0.10523604601621628, 'timestamp': '2025-09-30 22:34:20.755121', 'step': 17971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.794814', 'step': 17971, 'epoch': 3} {'type': 'loss', 'content': 0.07929142564535141, 'timestamp': '2025-09-30 22:34:20.819754', 'step': 17972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.858922', 'step': 17972, 'epoch': 3} {'type': 'loss', 'content': 0.1075398251414299, 'timestamp': '2025-09-30 22:34:20.864026', 'step': 17973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:20.896438', 'step': 17973, 'epoch': 3} {'type': 'loss', 'content': 0.043949760496616364, 'timestamp': '2025-09-30 22:34:20.910045', 'step': 17974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:20.943042', 'step': 17974, 'epoch': 3} {'type': 'loss', 'content': 0.010680995881557465, 'timestamp': '2025-09-30 22:34:20.956047', 'step': 17975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:20.988345', 'step': 17975, 'epoch': 3} {'type': 'loss', 'content': 0.06384050846099854, 'timestamp': '2025-09-30 22:34:21.024653', 'step': 17976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.058077', 'step': 17976, 'epoch': 3} {'type': 'loss', 'content': 0.033781055361032486, 'timestamp': '2025-09-30 22:34:21.062640', 'step': 17977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.096447', 'step': 17977, 'epoch': 3} {'type': 'loss', 'content': 0.11049416661262512, 'timestamp': '2025-09-30 22:34:21.101153', 'step': 17978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:21.133178', 'step': 17978, 'epoch': 3} {'type': 'loss', 'content': 0.09096750617027283, 'timestamp': '2025-09-30 22:34:21.139139', 'step': 17979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.173282', 'step': 17979, 'epoch': 3} {'type': 'loss', 'content': 0.13219459354877472, 'timestamp': '2025-09-30 22:34:21.199411', 'step': 17980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:21.230477', 'step': 17980, 'epoch': 3} {'type': 'loss', 'content': 0.003699881723150611, 'timestamp': '2025-09-30 22:34:21.239769', 'step': 17981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.272192', 'step': 17981, 'epoch': 3} {'type': 'loss', 'content': 0.02858772873878479, 'timestamp': '2025-09-30 22:34:21.275114', 'step': 17982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.309069', 'step': 17982, 'epoch': 3} {'type': 'loss', 'content': 0.055293262004852295, 'timestamp': '2025-09-30 22:34:21.313442', 'step': 17983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.344924', 'step': 17983, 'epoch': 3} {'type': 'loss', 'content': 0.07121684402227402, 'timestamp': '2025-09-30 22:34:21.377819', 'step': 17984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.412536', 'step': 17984, 'epoch': 3} {'type': 'loss', 'content': 0.05260813608765602, 'timestamp': '2025-09-30 22:34:21.418424', 'step': 17985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.453521', 'step': 17985, 'epoch': 3} {'type': 'loss', 'content': 0.047669392079114914, 'timestamp': '2025-09-30 22:34:21.459047', 'step': 17986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:21.491609', 'step': 17986, 'epoch': 3} {'type': 'loss', 'content': 0.030114218592643738, 'timestamp': '2025-09-30 22:34:21.494466', 'step': 17987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.532278', 'step': 17987, 'epoch': 3} {'type': 'loss', 'content': 0.05144810676574707, 'timestamp': '2025-09-30 22:34:21.557818', 'step': 17988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.591705', 'step': 17988, 'epoch': 3} {'type': 'loss', 'content': 0.13974155485630035, 'timestamp': '2025-09-30 22:34:21.594892', 'step': 17989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.633738', 'step': 17989, 'epoch': 3} {'type': 'loss', 'content': 0.08172056078910828, 'timestamp': '2025-09-30 22:34:21.636697', 'step': 17990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.669543', 'step': 17990, 'epoch': 3} {'type': 'loss', 'content': 0.05464742332696915, 'timestamp': '2025-09-30 22:34:21.679394', 'step': 17991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.712464', 'step': 17991, 'epoch': 3} {'type': 'loss', 'content': 0.028848988935351372, 'timestamp': '2025-09-30 22:34:21.739082', 'step': 17992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:21.774966', 'step': 17992, 'epoch': 3} {'type': 'loss', 'content': 0.08159468322992325, 'timestamp': '2025-09-30 22:34:21.778933', 'step': 17993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.811744', 'step': 17993, 'epoch': 3} {'type': 'loss', 'content': 0.06375225633382797, 'timestamp': '2025-09-30 22:34:21.823001', 'step': 17994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:21.867105', 'step': 17994, 'epoch': 3} {'type': 'loss', 'content': 0.04658827185630798, 'timestamp': '2025-09-30 22:34:21.879165', 'step': 17995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:21.912502', 'step': 17995, 'epoch': 3} {'type': 'loss', 'content': 0.019279280677437782, 'timestamp': '2025-09-30 22:34:21.939693', 'step': 17996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:21.972109', 'step': 17996, 'epoch': 3} {'type': 'loss', 'content': 0.11058537662029266, 'timestamp': '2025-09-30 22:34:21.976836', 'step': 17997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:22.011120', 'step': 17997, 'epoch': 3} {'type': 'loss', 'content': 0.05329246073961258, 'timestamp': '2025-09-30 22:34:22.013775', 'step': 17998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:22.045268', 'step': 17998, 'epoch': 3} {'type': 'loss', 'content': 0.054193150252103806, 'timestamp': '2025-09-30 22:34:22.050815', 'step': 17999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:22.082044', 'step': 17999, 'epoch': 3} {'type': 'loss', 'content': 0.15890227258205414, 'timestamp': '2025-09-30 22:34:22.108526', 'step': 18000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18000', 'timestamp': '2025-09-30 22:34:27.261654', 'step': 18000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:27.310181', 'step': 18000, 'epoch': 3} {'type': 'loss', 'content': 0.09856975078582764, 'timestamp': '2025-09-30 22:34:27.315784', 'step': 18001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:27.350131', 'step': 18001, 'epoch': 3} {'type': 'loss', 'content': 0.03510233387351036, 'timestamp': '2025-09-30 22:34:27.352689', 'step': 18002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:27.383826', 'step': 18002, 'epoch': 3} {'type': 'loss', 'content': 0.061614990234375, 'timestamp': '2025-09-30 22:34:27.389833', 'step': 18003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:27.423626', 'step': 18003, 'epoch': 3} {'type': 'loss', 'content': 0.04091613367199898, 'timestamp': '2025-09-30 22:34:27.458020', 'step': 18004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:27.493193', 'step': 18004, 'epoch': 3} {'type': 'loss', 'content': 0.03357774391770363, 'timestamp': '2025-09-30 22:34:27.509895', 'step': 18005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:27.553149', 'step': 18005, 'epoch': 3} {'type': 'loss', 'content': 0.04619348421692848, 'timestamp': '2025-09-30 22:34:27.562610', 'step': 18006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:27.596461', 'step': 18006, 'epoch': 3} {'type': 'loss', 'content': 0.03797009214758873, 'timestamp': '2025-09-30 22:34:27.601901', 'step': 18007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:27.636782', 'step': 18007, 'epoch': 3} {'type': 'loss', 'content': 0.057879313826560974, 'timestamp': '2025-09-30 22:34:27.661729', 'step': 18008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:27.693425', 'step': 18008, 'epoch': 3} {'type': 'loss', 'content': 0.04651077836751938, 'timestamp': '2025-09-30 22:34:27.698031', 'step': 18009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:27.740103', 'step': 18009, 'epoch': 3} {'type': 'loss', 'content': 0.03824565187096596, 'timestamp': '2025-09-30 22:34:27.743904', 'step': 18010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:27.777960', 'step': 18010, 'epoch': 3} {'type': 'loss', 'content': 0.13446003198623657, 'timestamp': '2025-09-30 22:34:27.792279', 'step': 18011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:27.826669', 'step': 18011, 'epoch': 3} {'type': 'loss', 'content': 0.10129386931657791, 'timestamp': '2025-09-30 22:34:27.852992', 'step': 18012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:27.891780', 'step': 18012, 'epoch': 3} {'type': 'loss', 'content': 0.07578539848327637, 'timestamp': '2025-09-30 22:34:27.896004', 'step': 18013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:27.935901', 'step': 18013, 'epoch': 3} {'type': 'loss', 'content': 0.05356907844543457, 'timestamp': '2025-09-30 22:34:27.940949', 'step': 18014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:27.973670', 'step': 18014, 'epoch': 3} {'type': 'loss', 'content': 0.05526250600814819, 'timestamp': '2025-09-30 22:34:27.978389', 'step': 18015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.012939', 'step': 18015, 'epoch': 3} {'type': 'loss', 'content': 0.05943244695663452, 'timestamp': '2025-09-30 22:34:28.039147', 'step': 18016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.072296', 'step': 18016, 'epoch': 3} {'type': 'loss', 'content': 0.0561249777674675, 'timestamp': '2025-09-30 22:34:28.084616', 'step': 18017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.117859', 'step': 18017, 'epoch': 3} {'type': 'loss', 'content': 0.02981361746788025, 'timestamp': '2025-09-30 22:34:28.135887', 'step': 18018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.172083', 'step': 18018, 'epoch': 3} {'type': 'loss', 'content': 0.020818131044507027, 'timestamp': '2025-09-30 22:34:28.176903', 'step': 18019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:28.210338', 'step': 18019, 'epoch': 3} {'type': 'loss', 'content': 0.04924428090453148, 'timestamp': '2025-09-30 22:34:28.236630', 'step': 18020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:28.269206', 'step': 18020, 'epoch': 3} {'type': 'loss', 'content': 0.1128806471824646, 'timestamp': '2025-09-30 22:34:28.274227', 'step': 18021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.308098', 'step': 18021, 'epoch': 3} {'type': 'loss', 'content': 0.02192111499607563, 'timestamp': '2025-09-30 22:34:28.311436', 'step': 18022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.344060', 'step': 18022, 'epoch': 3} {'type': 'loss', 'content': 0.11957760155200958, 'timestamp': '2025-09-30 22:34:28.349526', 'step': 18023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.385787', 'step': 18023, 'epoch': 3} {'type': 'loss', 'content': 0.048542436212301254, 'timestamp': '2025-09-30 22:34:28.411895', 'step': 18024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:28.452179', 'step': 18024, 'epoch': 3} {'type': 'loss', 'content': 0.08801846206188202, 'timestamp': '2025-09-30 22:34:28.467878', 'step': 18025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:28.500557', 'step': 18025, 'epoch': 3} {'type': 'loss', 'content': 0.08187125623226166, 'timestamp': '2025-09-30 22:34:28.505088', 'step': 18026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:28.540901', 'step': 18026, 'epoch': 3} {'type': 'loss', 'content': 0.06236346811056137, 'timestamp': '2025-09-30 22:34:28.546030', 'step': 18027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.586527', 'step': 18027, 'epoch': 3} {'type': 'loss', 'content': 0.022839413955807686, 'timestamp': '2025-09-30 22:34:28.611746', 'step': 18028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.646858', 'step': 18028, 'epoch': 3} {'type': 'loss', 'content': 0.024294937029480934, 'timestamp': '2025-09-30 22:34:28.652257', 'step': 18029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.690963', 'step': 18029, 'epoch': 3} {'type': 'loss', 'content': 0.04682433232665062, 'timestamp': '2025-09-30 22:34:28.694592', 'step': 18030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:28.727899', 'step': 18030, 'epoch': 3} {'type': 'loss', 'content': 0.049590885639190674, 'timestamp': '2025-09-30 22:34:28.731982', 'step': 18031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.764625', 'step': 18031, 'epoch': 3} {'type': 'loss', 'content': 0.07080414891242981, 'timestamp': '2025-09-30 22:34:28.799857', 'step': 18032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:28.832377', 'step': 18032, 'epoch': 3} {'type': 'loss', 'content': 0.05535869300365448, 'timestamp': '2025-09-30 22:34:28.837882', 'step': 18033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:28.871368', 'step': 18033, 'epoch': 3} {'type': 'loss', 'content': 0.05385696887969971, 'timestamp': '2025-09-30 22:34:28.875849', 'step': 18034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:28.909600', 'step': 18034, 'epoch': 3} {'type': 'loss', 'content': 0.0533178374171257, 'timestamp': '2025-09-30 22:34:28.915039', 'step': 18035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:28.958750', 'step': 18035, 'epoch': 3} {'type': 'loss', 'content': 0.13051830232143402, 'timestamp': '2025-09-30 22:34:28.993479', 'step': 18036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.025909', 'step': 18036, 'epoch': 3} {'type': 'loss', 'content': 0.13783127069473267, 'timestamp': '2025-09-30 22:34:29.029958', 'step': 18037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:29.061134', 'step': 18037, 'epoch': 3} {'type': 'loss', 'content': 0.024698475375771523, 'timestamp': '2025-09-30 22:34:29.067300', 'step': 18038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:29.117030', 'step': 18038, 'epoch': 3} {'type': 'loss', 'content': 0.1005084216594696, 'timestamp': '2025-09-30 22:34:29.121566', 'step': 18039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:29.153619', 'step': 18039, 'epoch': 3} {'type': 'loss', 'content': 0.00131953542586416, 'timestamp': '2025-09-30 22:34:29.189142', 'step': 18040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:29.222293', 'step': 18040, 'epoch': 3} {'type': 'loss', 'content': 0.03137499839067459, 'timestamp': '2025-09-30 22:34:29.226677', 'step': 18041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:29.259348', 'step': 18041, 'epoch': 3} {'type': 'loss', 'content': 0.02182723581790924, 'timestamp': '2025-09-30 22:34:29.263846', 'step': 18042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.302475', 'step': 18042, 'epoch': 3} {'type': 'loss', 'content': 0.046655330806970596, 'timestamp': '2025-09-30 22:34:29.306639', 'step': 18043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.343609', 'step': 18043, 'epoch': 3} {'type': 'loss', 'content': 0.05989120528101921, 'timestamp': '2025-09-30 22:34:29.376399', 'step': 18044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:29.409517', 'step': 18044, 'epoch': 3} {'type': 'loss', 'content': 0.050284478813409805, 'timestamp': '2025-09-30 22:34:29.423255', 'step': 18045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:29.462570', 'step': 18045, 'epoch': 3} {'type': 'loss', 'content': 0.09656058251857758, 'timestamp': '2025-09-30 22:34:29.474737', 'step': 18046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:29.506576', 'step': 18046, 'epoch': 3} {'type': 'loss', 'content': 0.008447394706308842, 'timestamp': '2025-09-30 22:34:29.510350', 'step': 18047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:29.541937', 'step': 18047, 'epoch': 3} {'type': 'loss', 'content': 0.04380619153380394, 'timestamp': '2025-09-30 22:34:29.567270', 'step': 18048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:29.602882', 'step': 18048, 'epoch': 3} {'type': 'loss', 'content': 0.03744909167289734, 'timestamp': '2025-09-30 22:34:29.615800', 'step': 18049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.648507', 'step': 18049, 'epoch': 3} {'type': 'loss', 'content': 0.06711092591285706, 'timestamp': '2025-09-30 22:34:29.652426', 'step': 18050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.693233', 'step': 18050, 'epoch': 3} {'type': 'loss', 'content': 0.08762619644403458, 'timestamp': '2025-09-30 22:34:29.704307', 'step': 18051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:29.736512', 'step': 18051, 'epoch': 3} {'type': 'loss', 'content': 0.09557024389505386, 'timestamp': '2025-09-30 22:34:29.763050', 'step': 18052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.800383', 'step': 18052, 'epoch': 3} {'type': 'loss', 'content': 0.12638801336288452, 'timestamp': '2025-09-30 22:34:29.806112', 'step': 18053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:29.846179', 'step': 18053, 'epoch': 3} {'type': 'loss', 'content': 0.09610176831483841, 'timestamp': '2025-09-30 22:34:29.851453', 'step': 18054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.891329', 'step': 18054, 'epoch': 3} {'type': 'loss', 'content': 0.05576195940375328, 'timestamp': '2025-09-30 22:34:29.895734', 'step': 18055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:29.929098', 'step': 18055, 'epoch': 3} {'type': 'loss', 'content': 0.1368628889322281, 'timestamp': '2025-09-30 22:34:29.963058', 'step': 18056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:30.003622', 'step': 18056, 'epoch': 3} {'type': 'loss', 'content': 0.08510874211788177, 'timestamp': '2025-09-30 22:34:30.020739', 'step': 18057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.062276', 'step': 18057, 'epoch': 3} {'type': 'loss', 'content': 0.08009567111730576, 'timestamp': '2025-09-30 22:34:30.066405', 'step': 18058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:30.097728', 'step': 18058, 'epoch': 3} {'type': 'loss', 'content': 0.07670113444328308, 'timestamp': '2025-09-30 22:34:30.101583', 'step': 18059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.133578', 'step': 18059, 'epoch': 3} {'type': 'loss', 'content': 0.09288115054368973, 'timestamp': '2025-09-30 22:34:30.173976', 'step': 18060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:30.209160', 'step': 18060, 'epoch': 3} {'type': 'loss', 'content': 0.06702616065740585, 'timestamp': '2025-09-30 22:34:30.213113', 'step': 18061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.248574', 'step': 18061, 'epoch': 3} {'type': 'loss', 'content': 0.023490600287914276, 'timestamp': '2025-09-30 22:34:30.265281', 'step': 18062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:30.298905', 'step': 18062, 'epoch': 3} {'type': 'loss', 'content': 0.10144991427659988, 'timestamp': '2025-09-30 22:34:30.306028', 'step': 18063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.356602', 'step': 18063, 'epoch': 3} {'type': 'loss', 'content': 0.04271604120731354, 'timestamp': '2025-09-30 22:34:30.384377', 'step': 18064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:30.417324', 'step': 18064, 'epoch': 3} {'type': 'loss', 'content': 0.07942253351211548, 'timestamp': '2025-09-30 22:34:30.424489', 'step': 18065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.470760', 'step': 18065, 'epoch': 3} {'type': 'loss', 'content': 0.10247983038425446, 'timestamp': '2025-09-30 22:34:30.483424', 'step': 18066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.530273', 'step': 18066, 'epoch': 3} {'type': 'loss', 'content': 0.0632866695523262, 'timestamp': '2025-09-30 22:34:30.535520', 'step': 18067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.568568', 'step': 18067, 'epoch': 3} {'type': 'loss', 'content': 0.06549304723739624, 'timestamp': '2025-09-30 22:34:30.593878', 'step': 18068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.625662', 'step': 18068, 'epoch': 3} {'type': 'loss', 'content': 0.053546275943517685, 'timestamp': '2025-09-30 22:34:30.640842', 'step': 18069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.675114', 'step': 18069, 'epoch': 3} {'type': 'loss', 'content': 0.05199041590094566, 'timestamp': '2025-09-30 22:34:30.678496', 'step': 18070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.712487', 'step': 18070, 'epoch': 3} {'type': 'loss', 'content': 0.05950646474957466, 'timestamp': '2025-09-30 22:34:30.725352', 'step': 18071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.768467', 'step': 18071, 'epoch': 3} {'type': 'loss', 'content': 0.047349587082862854, 'timestamp': '2025-09-30 22:34:30.793546', 'step': 18072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.827852', 'step': 18072, 'epoch': 3} {'type': 'loss', 'content': 0.07185792177915573, 'timestamp': '2025-09-30 22:34:30.839471', 'step': 18073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.873362', 'step': 18073, 'epoch': 3} {'type': 'loss', 'content': 0.06390530616044998, 'timestamp': '2025-09-30 22:34:30.877385', 'step': 18074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:30.912126', 'step': 18074, 'epoch': 3} {'type': 'loss', 'content': 0.09436176717281342, 'timestamp': '2025-09-30 22:34:30.924698', 'step': 18075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:30.964332', 'step': 18075, 'epoch': 3} {'type': 'loss', 'content': 0.07116285711526871, 'timestamp': '2025-09-30 22:34:30.996732', 'step': 18076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:31.034539', 'step': 18076, 'epoch': 3} {'type': 'loss', 'content': 0.08982734382152557, 'timestamp': '2025-09-30 22:34:31.038664', 'step': 18077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.078535', 'step': 18077, 'epoch': 3} {'type': 'loss', 'content': 0.06339077651500702, 'timestamp': '2025-09-30 22:34:31.093864', 'step': 18078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:31.133738', 'step': 18078, 'epoch': 3} {'type': 'loss', 'content': 0.05139711871743202, 'timestamp': '2025-09-30 22:34:31.137132', 'step': 18079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.169890', 'step': 18079, 'epoch': 3} {'type': 'loss', 'content': 0.08552373200654984, 'timestamp': '2025-09-30 22:34:31.195359', 'step': 18080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:31.241424', 'step': 18080, 'epoch': 3} {'type': 'loss', 'content': 0.097694531083107, 'timestamp': '2025-09-30 22:34:31.246005', 'step': 18081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.278609', 'step': 18081, 'epoch': 3} {'type': 'loss', 'content': 0.06907328218221664, 'timestamp': '2025-09-30 22:34:31.289700', 'step': 18082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:31.323705', 'step': 18082, 'epoch': 3} {'type': 'loss', 'content': 0.08666221797466278, 'timestamp': '2025-09-30 22:34:31.329165', 'step': 18083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:31.361539', 'step': 18083, 'epoch': 3} {'type': 'loss', 'content': 0.07403538376092911, 'timestamp': '2025-09-30 22:34:31.389773', 'step': 18084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:31.428501', 'step': 18084, 'epoch': 3} {'type': 'loss', 'content': 0.058576375246047974, 'timestamp': '2025-09-30 22:34:31.432009', 'step': 18085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:31.469527', 'step': 18085, 'epoch': 3} {'type': 'loss', 'content': 0.06374607980251312, 'timestamp': '2025-09-30 22:34:31.486796', 'step': 18086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.529141', 'step': 18086, 'epoch': 3} {'type': 'loss', 'content': 0.1678290069103241, 'timestamp': '2025-09-30 22:34:31.535578', 'step': 18087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:31.570538', 'step': 18087, 'epoch': 3} {'type': 'loss', 'content': 0.15372636914253235, 'timestamp': '2025-09-30 22:34:31.597716', 'step': 18088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:31.633413', 'step': 18088, 'epoch': 3} {'type': 'loss', 'content': 0.052702490240335464, 'timestamp': '2025-09-30 22:34:31.636586', 'step': 18089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.671224', 'step': 18089, 'epoch': 3} {'type': 'loss', 'content': 0.04449499770998955, 'timestamp': '2025-09-30 22:34:31.678349', 'step': 18090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.711720', 'step': 18090, 'epoch': 3} {'type': 'loss', 'content': 0.038176532834768295, 'timestamp': '2025-09-30 22:34:31.724021', 'step': 18091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:31.759214', 'step': 18091, 'epoch': 3} {'type': 'loss', 'content': 0.08893583714962006, 'timestamp': '2025-09-30 22:34:31.789724', 'step': 18092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.824736', 'step': 18092, 'epoch': 3} {'type': 'loss', 'content': 0.09072393923997879, 'timestamp': '2025-09-30 22:34:31.831837', 'step': 18093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:31.864273', 'step': 18093, 'epoch': 3} {'type': 'loss', 'content': 0.09059347957372665, 'timestamp': '2025-09-30 22:34:31.877291', 'step': 18094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:31.916948', 'step': 18094, 'epoch': 3} {'type': 'loss', 'content': 0.0920061245560646, 'timestamp': '2025-09-30 22:34:31.932644', 'step': 18095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:31.976778', 'step': 18095, 'epoch': 3} {'type': 'loss', 'content': 0.04681064561009407, 'timestamp': '2025-09-30 22:34:32.012124', 'step': 18096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.045829', 'step': 18096, 'epoch': 3} {'type': 'loss', 'content': 0.04128190129995346, 'timestamp': '2025-09-30 22:34:32.049333', 'step': 18097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.084513', 'step': 18097, 'epoch': 3} {'type': 'loss', 'content': 0.0693180039525032, 'timestamp': '2025-09-30 22:34:32.089056', 'step': 18098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.120584', 'step': 18098, 'epoch': 3} {'type': 'loss', 'content': 0.0504533015191555, 'timestamp': '2025-09-30 22:34:32.131998', 'step': 18099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.173507', 'step': 18099, 'epoch': 3} {'type': 'loss', 'content': 0.05061369761824608, 'timestamp': '2025-09-30 22:34:32.205561', 'step': 18100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:32.238172', 'step': 18100, 'epoch': 3} {'type': 'loss', 'content': 0.05778416991233826, 'timestamp': '2025-09-30 22:34:32.243345', 'step': 18101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.277510', 'step': 18101, 'epoch': 3} {'type': 'loss', 'content': 0.020773064345121384, 'timestamp': '2025-09-30 22:34:32.281929', 'step': 18102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.314555', 'step': 18102, 'epoch': 3} {'type': 'loss', 'content': 0.0322832353413105, 'timestamp': '2025-09-30 22:34:32.318357', 'step': 18103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.350518', 'step': 18103, 'epoch': 3} {'type': 'loss', 'content': 0.052731502801179886, 'timestamp': '2025-09-30 22:34:32.384467', 'step': 18104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:32.427167', 'step': 18104, 'epoch': 3} {'type': 'loss', 'content': 0.07893253862857819, 'timestamp': '2025-09-30 22:34:32.430180', 'step': 18105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.462053', 'step': 18105, 'epoch': 3} {'type': 'loss', 'content': 0.04131355136632919, 'timestamp': '2025-09-30 22:34:32.465924', 'step': 18106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.509377', 'step': 18106, 'epoch': 3} {'type': 'loss', 'content': 0.06916128844022751, 'timestamp': '2025-09-30 22:34:32.514258', 'step': 18107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:32.547417', 'step': 18107, 'epoch': 3} {'type': 'loss', 'content': 0.0773853063583374, 'timestamp': '2025-09-30 22:34:32.574047', 'step': 18108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.607579', 'step': 18108, 'epoch': 3} {'type': 'loss', 'content': 0.034423161298036575, 'timestamp': '2025-09-30 22:34:32.611148', 'step': 18109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.649121', 'step': 18109, 'epoch': 3} {'type': 'loss', 'content': 0.01697036437690258, 'timestamp': '2025-09-30 22:34:32.653481', 'step': 18110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.688958', 'step': 18110, 'epoch': 3} {'type': 'loss', 'content': 0.044152382761240005, 'timestamp': '2025-09-30 22:34:32.693513', 'step': 18111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:32.729369', 'step': 18111, 'epoch': 3} {'type': 'loss', 'content': 0.011983238160610199, 'timestamp': '2025-09-30 22:34:32.754418', 'step': 18112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:32.789068', 'step': 18112, 'epoch': 3} {'type': 'loss', 'content': 0.05094793438911438, 'timestamp': '2025-09-30 22:34:32.794883', 'step': 18113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:32.829053', 'step': 18113, 'epoch': 3} {'type': 'loss', 'content': 0.08169858902692795, 'timestamp': '2025-09-30 22:34:32.832947', 'step': 18114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.870512', 'step': 18114, 'epoch': 3} {'type': 'loss', 'content': 0.05249115824699402, 'timestamp': '2025-09-30 22:34:32.888262', 'step': 18115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.921533', 'step': 18115, 'epoch': 3} {'type': 'loss', 'content': 0.06669795513153076, 'timestamp': '2025-09-30 22:34:32.946798', 'step': 18116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:32.993386', 'step': 18116, 'epoch': 3} {'type': 'loss', 'content': 0.04402865469455719, 'timestamp': '2025-09-30 22:34:32.997433', 'step': 18117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:33.037758', 'step': 18117, 'epoch': 3} {'type': 'loss', 'content': 0.04414417967200279, 'timestamp': '2025-09-30 22:34:33.043087', 'step': 18118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.076364', 'step': 18118, 'epoch': 3} {'type': 'loss', 'content': 0.06649547815322876, 'timestamp': '2025-09-30 22:34:33.081705', 'step': 18119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.114827', 'step': 18119, 'epoch': 3} {'type': 'loss', 'content': 0.030527163296937943, 'timestamp': '2025-09-30 22:34:33.143031', 'step': 18120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.177994', 'step': 18120, 'epoch': 3} {'type': 'loss', 'content': 0.04375910386443138, 'timestamp': '2025-09-30 22:34:33.201548', 'step': 18121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.243043', 'step': 18121, 'epoch': 3} {'type': 'loss', 'content': 0.0791725441813469, 'timestamp': '2025-09-30 22:34:33.246496', 'step': 18122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.289333', 'step': 18122, 'epoch': 3} {'type': 'loss', 'content': 0.11233911663293839, 'timestamp': '2025-09-30 22:34:33.293066', 'step': 18123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.328911', 'step': 18123, 'epoch': 3} {'type': 'loss', 'content': 0.11121825128793716, 'timestamp': '2025-09-30 22:34:33.368079', 'step': 18124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.407346', 'step': 18124, 'epoch': 3} {'type': 'loss', 'content': 0.02569347620010376, 'timestamp': '2025-09-30 22:34:33.410589', 'step': 18125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.443489', 'step': 18125, 'epoch': 3} {'type': 'loss', 'content': 0.06601175665855408, 'timestamp': '2025-09-30 22:34:33.448568', 'step': 18126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:33.482308', 'step': 18126, 'epoch': 3} {'type': 'loss', 'content': 0.12256590276956558, 'timestamp': '2025-09-30 22:34:33.485507', 'step': 18127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:33.525097', 'step': 18127, 'epoch': 3} {'type': 'loss', 'content': 0.05828908830881119, 'timestamp': '2025-09-30 22:34:33.556585', 'step': 18128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.594441', 'step': 18128, 'epoch': 3} {'type': 'loss', 'content': 0.03226058557629585, 'timestamp': '2025-09-30 22:34:33.608520', 'step': 18129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.646484', 'step': 18129, 'epoch': 3} {'type': 'loss', 'content': 0.11340887099504471, 'timestamp': '2025-09-30 22:34:33.653112', 'step': 18130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.686919', 'step': 18130, 'epoch': 3} {'type': 'loss', 'content': 0.09004683792591095, 'timestamp': '2025-09-30 22:34:33.691682', 'step': 18131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:33.726699', 'step': 18131, 'epoch': 3} {'type': 'loss', 'content': 0.028371065855026245, 'timestamp': '2025-09-30 22:34:33.751935', 'step': 18132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.783451', 'step': 18132, 'epoch': 3} {'type': 'loss', 'content': 0.09015455842018127, 'timestamp': '2025-09-30 22:34:33.794212', 'step': 18133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.826900', 'step': 18133, 'epoch': 3} {'type': 'loss', 'content': 0.04087287560105324, 'timestamp': '2025-09-30 22:34:33.838408', 'step': 18134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:33.876311', 'step': 18134, 'epoch': 3} {'type': 'loss', 'content': 0.10875551402568817, 'timestamp': '2025-09-30 22:34:33.881463', 'step': 18135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.914155', 'step': 18135, 'epoch': 3} {'type': 'loss', 'content': 0.0890272930264473, 'timestamp': '2025-09-30 22:34:33.949428', 'step': 18136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:33.990468', 'step': 18136, 'epoch': 3} {'type': 'loss', 'content': 0.12616240978240967, 'timestamp': '2025-09-30 22:34:33.993871', 'step': 18137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:34.026594', 'step': 18137, 'epoch': 3} {'type': 'loss', 'content': 0.05426241457462311, 'timestamp': '2025-09-30 22:34:34.029809', 'step': 18138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:34.068347', 'step': 18138, 'epoch': 3} {'type': 'loss', 'content': 0.09373223781585693, 'timestamp': '2025-09-30 22:34:34.071207', 'step': 18139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.107338', 'step': 18139, 'epoch': 3} {'type': 'loss', 'content': 0.05367192253470421, 'timestamp': '2025-09-30 22:34:34.132568', 'step': 18140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.173771', 'step': 18140, 'epoch': 3} {'type': 'loss', 'content': 0.08173816651105881, 'timestamp': '2025-09-30 22:34:34.176454', 'step': 18141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:34.207447', 'step': 18141, 'epoch': 3} {'type': 'loss', 'content': 0.10583034157752991, 'timestamp': '2025-09-30 22:34:34.219128', 'step': 18142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.259770', 'step': 18142, 'epoch': 3} {'type': 'loss', 'content': 0.07939951121807098, 'timestamp': '2025-09-30 22:34:34.273397', 'step': 18143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:34.318075', 'step': 18143, 'epoch': 3} {'type': 'loss', 'content': 0.09168428927659988, 'timestamp': '2025-09-30 22:34:34.352031', 'step': 18144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:34.383678', 'step': 18144, 'epoch': 3} {'type': 'loss', 'content': 0.045788783580064774, 'timestamp': '2025-09-30 22:34:34.387278', 'step': 18145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:34.424782', 'step': 18145, 'epoch': 3} {'type': 'loss', 'content': 0.04693365469574928, 'timestamp': '2025-09-30 22:34:34.428227', 'step': 18146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:34.459783', 'step': 18146, 'epoch': 3} {'type': 'loss', 'content': 0.11245997250080109, 'timestamp': '2025-09-30 22:34:34.464901', 'step': 18147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:34.498861', 'step': 18147, 'epoch': 3} {'type': 'loss', 'content': 0.10888271033763885, 'timestamp': '2025-09-30 22:34:34.523446', 'step': 18148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.566736', 'step': 18148, 'epoch': 3} {'type': 'loss', 'content': 0.045403625816106796, 'timestamp': '2025-09-30 22:34:34.572729', 'step': 18149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:34.606845', 'step': 18149, 'epoch': 3} {'type': 'loss', 'content': 0.059119176119565964, 'timestamp': '2025-09-30 22:34:34.611546', 'step': 18150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:34.643821', 'step': 18150, 'epoch': 3} {'type': 'loss', 'content': 0.06576882302761078, 'timestamp': '2025-09-30 22:34:34.647908', 'step': 18151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.680768', 'step': 18151, 'epoch': 3} {'type': 'loss', 'content': 0.03568235784769058, 'timestamp': '2025-09-30 22:34:34.708184', 'step': 18152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:34.749335', 'step': 18152, 'epoch': 3} {'type': 'loss', 'content': 0.03014066070318222, 'timestamp': '2025-09-30 22:34:34.754029', 'step': 18153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:34.791276', 'step': 18153, 'epoch': 3} {'type': 'loss', 'content': 0.06443624198436737, 'timestamp': '2025-09-30 22:34:34.806585', 'step': 18154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:34.838770', 'step': 18154, 'epoch': 3} {'type': 'loss', 'content': 0.18241773545742035, 'timestamp': '2025-09-30 22:34:34.841139', 'step': 18155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:34.878756', 'step': 18155, 'epoch': 3} {'type': 'loss', 'content': 0.021438030526041985, 'timestamp': '2025-09-30 22:34:34.904459', 'step': 18156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:34.937946', 'step': 18156, 'epoch': 3} {'type': 'loss', 'content': 0.09471039474010468, 'timestamp': '2025-09-30 22:34:34.941971', 'step': 18157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:34.974117', 'step': 18157, 'epoch': 3} {'type': 'loss', 'content': 0.10242227464914322, 'timestamp': '2025-09-30 22:34:34.978499', 'step': 18158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:35.011105', 'step': 18158, 'epoch': 3} {'type': 'loss', 'content': 0.08989682048559189, 'timestamp': '2025-09-30 22:34:35.020827', 'step': 18159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.052911', 'step': 18159, 'epoch': 3} {'type': 'loss', 'content': 0.061342641711235046, 'timestamp': '2025-09-30 22:34:35.087452', 'step': 18160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.120037', 'step': 18160, 'epoch': 3} {'type': 'loss', 'content': 0.03977299854159355, 'timestamp': '2025-09-30 22:34:35.127252', 'step': 18161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.159740', 'step': 18161, 'epoch': 3} {'type': 'loss', 'content': 0.10791975259780884, 'timestamp': '2025-09-30 22:34:35.164449', 'step': 18162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.204482', 'step': 18162, 'epoch': 3} {'type': 'loss', 'content': 0.012979893013834953, 'timestamp': '2025-09-30 22:34:35.209348', 'step': 18163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.257246', 'step': 18163, 'epoch': 3} {'type': 'loss', 'content': 0.11508994549512863, 'timestamp': '2025-09-30 22:34:35.294991', 'step': 18164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.329751', 'step': 18164, 'epoch': 3} {'type': 'loss', 'content': 0.07387402653694153, 'timestamp': '2025-09-30 22:34:35.334677', 'step': 18165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.367410', 'step': 18165, 'epoch': 3} {'type': 'loss', 'content': 0.05821947380900383, 'timestamp': '2025-09-30 22:34:35.371312', 'step': 18166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.403707', 'step': 18166, 'epoch': 3} {'type': 'loss', 'content': 0.026218902319669724, 'timestamp': '2025-09-30 22:34:35.408327', 'step': 18167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:35.442333', 'step': 18167, 'epoch': 3} {'type': 'loss', 'content': 0.15378296375274658, 'timestamp': '2025-09-30 22:34:35.468755', 'step': 18168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.500618', 'step': 18168, 'epoch': 3} {'type': 'loss', 'content': 0.1104116216301918, 'timestamp': '2025-09-30 22:34:35.515270', 'step': 18169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.554521', 'step': 18169, 'epoch': 3} {'type': 'loss', 'content': 0.03630552068352699, 'timestamp': '2025-09-30 22:34:35.559773', 'step': 18170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:35.594563', 'step': 18170, 'epoch': 3} {'type': 'loss', 'content': 0.06501337885856628, 'timestamp': '2025-09-30 22:34:35.608554', 'step': 18171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:35.646611', 'step': 18171, 'epoch': 3} {'type': 'loss', 'content': 0.07456912100315094, 'timestamp': '2025-09-30 22:34:35.676143', 'step': 18172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:35.709949', 'step': 18172, 'epoch': 3} {'type': 'loss', 'content': 0.03231661766767502, 'timestamp': '2025-09-30 22:34:35.713913', 'step': 18173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:35.752555', 'step': 18173, 'epoch': 3} {'type': 'loss', 'content': 0.08345401287078857, 'timestamp': '2025-09-30 22:34:35.756002', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:34:43.589845', 'step': 18174, 'epoch': 3} {'type': 'pplx', 'content': 13490.862097388708, 'timestamp': '2025-09-30 22:34:43.602516', 'step': 18174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:43.634548', 'step': 18174, 'epoch': 3} {'type': 'loss', 'content': 0.04874671250581741, 'timestamp': '2025-09-30 22:34:43.637725', 'step': 18175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:43.677837', 'step': 18175, 'epoch': 3} {'type': 'loss', 'content': 0.05998886004090309, 'timestamp': '2025-09-30 22:34:43.705194', 'step': 18176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:43.737064', 'step': 18176, 'epoch': 3} {'type': 'loss', 'content': 0.05831608176231384, 'timestamp': '2025-09-30 22:34:43.740126', 'step': 18177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:43.774992', 'step': 18177, 'epoch': 3} {'type': 'loss', 'content': 0.06339724361896515, 'timestamp': '2025-09-30 22:34:43.784229', 'step': 18178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:43.829510', 'step': 18178, 'epoch': 3} {'type': 'loss', 'content': 0.1001763567328453, 'timestamp': '2025-09-30 22:34:43.837269', 'step': 18179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:43.876013', 'step': 18179, 'epoch': 3} {'type': 'loss', 'content': 0.031371183693408966, 'timestamp': '2025-09-30 22:34:43.906359', 'step': 18180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:43.942003', 'step': 18180, 'epoch': 3} {'type': 'loss', 'content': 0.09792029857635498, 'timestamp': '2025-09-30 22:34:43.952030', 'step': 18181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:43.988318', 'step': 18181, 'epoch': 3} {'type': 'loss', 'content': 0.03778442367911339, 'timestamp': '2025-09-30 22:34:43.991407', 'step': 18182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.022281', 'step': 18182, 'epoch': 3} {'type': 'loss', 'content': 0.0545925535261631, 'timestamp': '2025-09-30 22:34:44.027653', 'step': 18183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:44.061610', 'step': 18183, 'epoch': 3} {'type': 'loss', 'content': 0.05729509890079498, 'timestamp': '2025-09-30 22:34:44.086813', 'step': 18184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:44.124775', 'step': 18184, 'epoch': 3} {'type': 'loss', 'content': 0.0550091527402401, 'timestamp': '2025-09-30 22:34:44.129084', 'step': 18185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.162404', 'step': 18185, 'epoch': 3} {'type': 'loss', 'content': 0.10156324505805969, 'timestamp': '2025-09-30 22:34:44.170020', 'step': 18186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.207251', 'step': 18186, 'epoch': 3} {'type': 'loss', 'content': 0.11867409944534302, 'timestamp': '2025-09-30 22:34:44.211998', 'step': 18187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.247934', 'step': 18187, 'epoch': 3} {'type': 'loss', 'content': 0.0825275406241417, 'timestamp': '2025-09-30 22:34:44.272894', 'step': 18188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.317803', 'step': 18188, 'epoch': 3} {'type': 'loss', 'content': 0.04719505086541176, 'timestamp': '2025-09-30 22:34:44.322394', 'step': 18189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.356621', 'step': 18189, 'epoch': 3} {'type': 'loss', 'content': 0.04132601246237755, 'timestamp': '2025-09-30 22:34:44.359470', 'step': 18190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.395487', 'step': 18190, 'epoch': 3} {'type': 'loss', 'content': 0.05330505222082138, 'timestamp': '2025-09-30 22:34:44.399721', 'step': 18191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.434182', 'step': 18191, 'epoch': 3} {'type': 'loss', 'content': 0.018704133108258247, 'timestamp': '2025-09-30 22:34:44.461975', 'step': 18192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.495965', 'step': 18192, 'epoch': 3} {'type': 'loss', 'content': 0.09662332385778427, 'timestamp': '2025-09-30 22:34:44.503798', 'step': 18193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:44.540833', 'step': 18193, 'epoch': 3} {'type': 'loss', 'content': 0.04958701133728027, 'timestamp': '2025-09-30 22:34:44.544637', 'step': 18194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:44.576743', 'step': 18194, 'epoch': 3} {'type': 'loss', 'content': 0.030122390016913414, 'timestamp': '2025-09-30 22:34:44.589329', 'step': 18195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.623423', 'step': 18195, 'epoch': 3} {'type': 'loss', 'content': 0.0800645500421524, 'timestamp': '2025-09-30 22:34:44.649940', 'step': 18196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:44.681489', 'step': 18196, 'epoch': 3} {'type': 'loss', 'content': 0.03432629629969597, 'timestamp': '2025-09-30 22:34:44.687237', 'step': 18197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.721338', 'step': 18197, 'epoch': 3} {'type': 'loss', 'content': 0.06660131365060806, 'timestamp': '2025-09-30 22:34:44.724099', 'step': 18198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.760595', 'step': 18198, 'epoch': 3} {'type': 'loss', 'content': 0.13727253675460815, 'timestamp': '2025-09-30 22:34:44.765530', 'step': 18199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:44.797558', 'step': 18199, 'epoch': 3} {'type': 'loss', 'content': 0.0715092197060585, 'timestamp': '2025-09-30 22:34:44.822896', 'step': 18200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:44.853948', 'step': 18200, 'epoch': 3} {'type': 'loss', 'content': 0.031975552439689636, 'timestamp': '2025-09-30 22:34:44.858507', 'step': 18201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:44.890327', 'step': 18201, 'epoch': 3} {'type': 'loss', 'content': 0.042585279792547226, 'timestamp': '2025-09-30 22:34:44.893622', 'step': 18202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:44.926920', 'step': 18202, 'epoch': 3} {'type': 'loss', 'content': 0.1342184692621231, 'timestamp': '2025-09-30 22:34:44.932556', 'step': 18203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:44.971162', 'step': 18203, 'epoch': 3} {'type': 'loss', 'content': 0.049681078642606735, 'timestamp': '2025-09-30 22:34:45.001463', 'step': 18204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:45.032956', 'step': 18204, 'epoch': 3} {'type': 'loss', 'content': 0.11842314898967743, 'timestamp': '2025-09-30 22:34:45.036341', 'step': 18205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.069563', 'step': 18205, 'epoch': 3} {'type': 'loss', 'content': 0.06795243918895721, 'timestamp': '2025-09-30 22:34:45.077564', 'step': 18206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.109009', 'step': 18206, 'epoch': 3} {'type': 'loss', 'content': 0.07549483329057693, 'timestamp': '2025-09-30 22:34:45.115061', 'step': 18207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.147696', 'step': 18207, 'epoch': 3} {'type': 'loss', 'content': 0.11824540793895721, 'timestamp': '2025-09-30 22:34:45.173858', 'step': 18208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:45.207779', 'step': 18208, 'epoch': 3} {'type': 'loss', 'content': 0.07813022285699844, 'timestamp': '2025-09-30 22:34:45.212537', 'step': 18209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.252319', 'step': 18209, 'epoch': 3} {'type': 'loss', 'content': 0.10297427326440811, 'timestamp': '2025-09-30 22:34:45.255019', 'step': 18210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.294270', 'step': 18210, 'epoch': 3} {'type': 'loss', 'content': 0.06963841617107391, 'timestamp': '2025-09-30 22:34:45.297367', 'step': 18211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.328858', 'step': 18211, 'epoch': 3} {'type': 'loss', 'content': 0.0023514435160905123, 'timestamp': '2025-09-30 22:34:45.354097', 'step': 18212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:45.385989', 'step': 18212, 'epoch': 3} {'type': 'loss', 'content': 0.10168766230344772, 'timestamp': '2025-09-30 22:34:45.396426', 'step': 18213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.427400', 'step': 18213, 'epoch': 3} {'type': 'loss', 'content': 0.08670400828123093, 'timestamp': '2025-09-30 22:34:45.440817', 'step': 18214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:45.473854', 'step': 18214, 'epoch': 3} {'type': 'loss', 'content': 0.0427713543176651, 'timestamp': '2025-09-30 22:34:45.479616', 'step': 18215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:45.513492', 'step': 18215, 'epoch': 3} {'type': 'loss', 'content': 0.0497187077999115, 'timestamp': '2025-09-30 22:34:45.539304', 'step': 18216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:45.572817', 'step': 18216, 'epoch': 3} {'type': 'loss', 'content': 0.04826099053025246, 'timestamp': '2025-09-30 22:34:45.580582', 'step': 18217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:45.611978', 'step': 18217, 'epoch': 3} {'type': 'loss', 'content': 0.0659414678812027, 'timestamp': '2025-09-30 22:34:45.616999', 'step': 18218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.648318', 'step': 18218, 'epoch': 3} {'type': 'loss', 'content': 0.009325847961008549, 'timestamp': '2025-09-30 22:34:45.652768', 'step': 18219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.684753', 'step': 18219, 'epoch': 3} {'type': 'loss', 'content': 0.10590209066867828, 'timestamp': '2025-09-30 22:34:45.714470', 'step': 18220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.750423', 'step': 18220, 'epoch': 3} {'type': 'loss', 'content': 0.0387897826731205, 'timestamp': '2025-09-30 22:34:45.753464', 'step': 18221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.788280', 'step': 18221, 'epoch': 3} {'type': 'loss', 'content': 0.12273122370243073, 'timestamp': '2025-09-30 22:34:45.790929', 'step': 18222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.827257', 'step': 18222, 'epoch': 3} {'type': 'loss', 'content': 0.06930379569530487, 'timestamp': '2025-09-30 22:34:45.830502', 'step': 18223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:45.861323', 'step': 18223, 'epoch': 3} {'type': 'loss', 'content': 0.05502648651599884, 'timestamp': '2025-09-30 22:34:45.888004', 'step': 18224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:45.920927', 'step': 18224, 'epoch': 3} {'type': 'loss', 'content': 0.05404229462146759, 'timestamp': '2025-09-30 22:34:45.924649', 'step': 18225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.959763', 'step': 18225, 'epoch': 3} {'type': 'loss', 'content': 0.02516498975455761, 'timestamp': '2025-09-30 22:34:45.962963', 'step': 18226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:45.996870', 'step': 18226, 'epoch': 3} {'type': 'loss', 'content': 0.051323823630809784, 'timestamp': '2025-09-30 22:34:45.999994', 'step': 18227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:46.036712', 'step': 18227, 'epoch': 3} {'type': 'loss', 'content': 0.06660208106040955, 'timestamp': '2025-09-30 22:34:46.069121', 'step': 18228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.105034', 'step': 18228, 'epoch': 3} {'type': 'loss', 'content': 0.08948849141597748, 'timestamp': '2025-09-30 22:34:46.107997', 'step': 18229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.138867', 'step': 18229, 'epoch': 3} {'type': 'loss', 'content': 0.03907440975308418, 'timestamp': '2025-09-30 22:34:46.142018', 'step': 18230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.173137', 'step': 18230, 'epoch': 3} {'type': 'loss', 'content': 0.12718431651592255, 'timestamp': '2025-09-30 22:34:46.180843', 'step': 18231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:46.213320', 'step': 18231, 'epoch': 3} {'type': 'loss', 'content': 0.02103772573173046, 'timestamp': '2025-09-30 22:34:46.238804', 'step': 18232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:46.269394', 'step': 18232, 'epoch': 3} {'type': 'loss', 'content': 0.03154906630516052, 'timestamp': '2025-09-30 22:34:46.272509', 'step': 18233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:46.303317', 'step': 18233, 'epoch': 3} {'type': 'loss', 'content': 0.06830448657274246, 'timestamp': '2025-09-30 22:34:46.306508', 'step': 18234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:46.338274', 'step': 18234, 'epoch': 3} {'type': 'loss', 'content': 0.07716982066631317, 'timestamp': '2025-09-30 22:34:46.341003', 'step': 18235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.372027', 'step': 18235, 'epoch': 3} {'type': 'loss', 'content': 0.03553827479481697, 'timestamp': '2025-09-30 22:34:46.402456', 'step': 18236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:46.437772', 'step': 18236, 'epoch': 3} {'type': 'loss', 'content': 0.03242909535765648, 'timestamp': '2025-09-30 22:34:46.441398', 'step': 18237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:46.471259', 'step': 18237, 'epoch': 3} {'type': 'loss', 'content': 0.0421697273850441, 'timestamp': '2025-09-30 22:34:46.476158', 'step': 18238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:46.510985', 'step': 18238, 'epoch': 3} {'type': 'loss', 'content': 0.07092773914337158, 'timestamp': '2025-09-30 22:34:46.515385', 'step': 18239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.549003', 'step': 18239, 'epoch': 3} {'type': 'loss', 'content': 0.027677062898874283, 'timestamp': '2025-09-30 22:34:46.573653', 'step': 18240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:46.606632', 'step': 18240, 'epoch': 3} {'type': 'loss', 'content': 0.06809481978416443, 'timestamp': '2025-09-30 22:34:46.615139', 'step': 18241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.648449', 'step': 18241, 'epoch': 3} {'type': 'loss', 'content': 0.041606735438108444, 'timestamp': '2025-09-30 22:34:46.652344', 'step': 18242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:46.687367', 'step': 18242, 'epoch': 3} {'type': 'loss', 'content': 0.026137877255678177, 'timestamp': '2025-09-30 22:34:46.697417', 'step': 18243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:46.728305', 'step': 18243, 'epoch': 3} {'type': 'loss', 'content': 0.05434572324156761, 'timestamp': '2025-09-30 22:34:46.754603', 'step': 18244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:46.794304', 'step': 18244, 'epoch': 3} {'type': 'loss', 'content': 0.12876465916633606, 'timestamp': '2025-09-30 22:34:46.799106', 'step': 18245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:46.832713', 'step': 18245, 'epoch': 3} {'type': 'loss', 'content': 0.03946410119533539, 'timestamp': '2025-09-30 22:34:46.837540', 'step': 18246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.869194', 'step': 18246, 'epoch': 3} {'type': 'loss', 'content': 0.07792777568101883, 'timestamp': '2025-09-30 22:34:46.872692', 'step': 18247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:46.905038', 'step': 18247, 'epoch': 3} {'type': 'loss', 'content': 0.13198436796665192, 'timestamp': '2025-09-30 22:34:46.930824', 'step': 18248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:46.961489', 'step': 18248, 'epoch': 3} {'type': 'loss', 'content': 0.07820656895637512, 'timestamp': '2025-09-30 22:34:46.968335', 'step': 18249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:46.999333', 'step': 18249, 'epoch': 3} {'type': 'loss', 'content': 0.04371469095349312, 'timestamp': '2025-09-30 22:34:47.002636', 'step': 18250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:47.036152', 'step': 18250, 'epoch': 3} {'type': 'loss', 'content': 0.021230582147836685, 'timestamp': '2025-09-30 22:34:47.054724', 'step': 18251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:47.095541', 'step': 18251, 'epoch': 3} {'type': 'loss', 'content': 0.038555387407541275, 'timestamp': '2025-09-30 22:34:47.121584', 'step': 18252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:47.154682', 'step': 18252, 'epoch': 3} {'type': 'loss', 'content': 0.0178188718855381, 'timestamp': '2025-09-30 22:34:47.166072', 'step': 18253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:47.204609', 'step': 18253, 'epoch': 3} {'type': 'loss', 'content': 0.07265282422304153, 'timestamp': '2025-09-30 22:34:47.208618', 'step': 18254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:47.239880', 'step': 18254, 'epoch': 3} {'type': 'loss', 'content': 0.008680264465510845, 'timestamp': '2025-09-30 22:34:47.243451', 'step': 18255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:47.275267', 'step': 18255, 'epoch': 3} {'type': 'loss', 'content': 0.11242268979549408, 'timestamp': '2025-09-30 22:34:47.301046', 'step': 18256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.332957', 'step': 18256, 'epoch': 3} {'type': 'loss', 'content': 0.04691224545240402, 'timestamp': '2025-09-30 22:34:47.338572', 'step': 18257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.370091', 'step': 18257, 'epoch': 3} {'type': 'loss', 'content': 0.057314854115247726, 'timestamp': '2025-09-30 22:34:47.373111', 'step': 18258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:47.406278', 'step': 18258, 'epoch': 3} {'type': 'loss', 'content': 0.11831549555063248, 'timestamp': '2025-09-30 22:34:47.411690', 'step': 18259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 432], 'flops': 12814563338304}, 'timestamp': '2025-09-30 22:34:47.453001', 'step': 18259, 'epoch': 3} {'type': 'loss', 'content': 0.038733433932065964, 'timestamp': '2025-09-30 22:34:47.490347', 'step': 18260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.522583', 'step': 18260, 'epoch': 3} {'type': 'loss', 'content': 0.0471339114010334, 'timestamp': '2025-09-30 22:34:47.526421', 'step': 18261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:47.556620', 'step': 18261, 'epoch': 3} {'type': 'loss', 'content': 0.1417866051197052, 'timestamp': '2025-09-30 22:34:47.570355', 'step': 18262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.602753', 'step': 18262, 'epoch': 3} {'type': 'loss', 'content': 0.09715128690004349, 'timestamp': '2025-09-30 22:34:47.607162', 'step': 18263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.640094', 'step': 18263, 'epoch': 3} {'type': 'loss', 'content': 0.11402668058872223, 'timestamp': '2025-09-30 22:34:47.665369', 'step': 18264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:47.705220', 'step': 18264, 'epoch': 3} {'type': 'loss', 'content': 0.05026932805776596, 'timestamp': '2025-09-30 22:34:47.710590', 'step': 18265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:47.752167', 'step': 18265, 'epoch': 3} {'type': 'loss', 'content': 0.07271978259086609, 'timestamp': '2025-09-30 22:34:47.765558', 'step': 18266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:47.797081', 'step': 18266, 'epoch': 3} {'type': 'loss', 'content': 0.03609304130077362, 'timestamp': '2025-09-30 22:34:47.802591', 'step': 18267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.840193', 'step': 18267, 'epoch': 3} {'type': 'loss', 'content': 0.0489949956536293, 'timestamp': '2025-09-30 22:34:47.873349', 'step': 18268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.915015', 'step': 18268, 'epoch': 3} {'type': 'loss', 'content': 0.10139638185501099, 'timestamp': '2025-09-30 22:34:47.919146', 'step': 18269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:47.951257', 'step': 18269, 'epoch': 3} {'type': 'loss', 'content': 0.03596404194831848, 'timestamp': '2025-09-30 22:34:47.955267', 'step': 18270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:47.999881', 'step': 18270, 'epoch': 3} {'type': 'loss', 'content': 0.07370282709598541, 'timestamp': '2025-09-30 22:34:48.004006', 'step': 18271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.038229', 'step': 18271, 'epoch': 3} {'type': 'loss', 'content': 0.04634566605091095, 'timestamp': '2025-09-30 22:34:48.064044', 'step': 18272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:48.095270', 'step': 18272, 'epoch': 3} {'type': 'loss', 'content': 0.051582351326942444, 'timestamp': '2025-09-30 22:34:48.102524', 'step': 18273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:48.134597', 'step': 18273, 'epoch': 3} {'type': 'loss', 'content': 0.04186704382300377, 'timestamp': '2025-09-30 22:34:48.140114', 'step': 18274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:48.174930', 'step': 18274, 'epoch': 3} {'type': 'loss', 'content': 0.10656381398439407, 'timestamp': '2025-09-30 22:34:48.183827', 'step': 18275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:48.218977', 'step': 18275, 'epoch': 3} {'type': 'loss', 'content': 0.14295805990695953, 'timestamp': '2025-09-30 22:34:48.244251', 'step': 18276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.281632', 'step': 18276, 'epoch': 3} {'type': 'loss', 'content': 0.06226928159594536, 'timestamp': '2025-09-30 22:34:48.285846', 'step': 18277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:48.333270', 'step': 18277, 'epoch': 3} {'type': 'loss', 'content': 0.03588036447763443, 'timestamp': '2025-09-30 22:34:48.340777', 'step': 18278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:48.372413', 'step': 18278, 'epoch': 3} {'type': 'loss', 'content': 0.06897161155939102, 'timestamp': '2025-09-30 22:34:48.377569', 'step': 18279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:48.418707', 'step': 18279, 'epoch': 3} {'type': 'loss', 'content': 0.09541758894920349, 'timestamp': '2025-09-30 22:34:48.455550', 'step': 18280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.494823', 'step': 18280, 'epoch': 3} {'type': 'loss', 'content': 0.12117468565702438, 'timestamp': '2025-09-30 22:34:48.499282', 'step': 18281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.539947', 'step': 18281, 'epoch': 3} {'type': 'loss', 'content': 0.04843823239207268, 'timestamp': '2025-09-30 22:34:48.551135', 'step': 18282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:48.582357', 'step': 18282, 'epoch': 3} {'type': 'loss', 'content': 0.008682411164045334, 'timestamp': '2025-09-30 22:34:48.590546', 'step': 18283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.625708', 'step': 18283, 'epoch': 3} {'type': 'loss', 'content': 0.06600552052259445, 'timestamp': '2025-09-30 22:34:48.650477', 'step': 18284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.682312', 'step': 18284, 'epoch': 3} {'type': 'loss', 'content': 0.14685603976249695, 'timestamp': '2025-09-30 22:34:48.690337', 'step': 18285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:48.722225', 'step': 18285, 'epoch': 3} {'type': 'loss', 'content': 0.022575490176677704, 'timestamp': '2025-09-30 22:34:48.726313', 'step': 18286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:48.767316', 'step': 18286, 'epoch': 3} {'type': 'loss', 'content': 0.09557326883077621, 'timestamp': '2025-09-30 22:34:48.770240', 'step': 18287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:48.809544', 'step': 18287, 'epoch': 3} {'type': 'loss', 'content': 0.040272925049066544, 'timestamp': '2025-09-30 22:34:48.839320', 'step': 18288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:48.880705', 'step': 18288, 'epoch': 3} {'type': 'loss', 'content': 0.07490332424640656, 'timestamp': '2025-09-30 22:34:48.886892', 'step': 18289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:48.921939', 'step': 18289, 'epoch': 3} {'type': 'loss', 'content': 0.054421354085206985, 'timestamp': '2025-09-30 22:34:48.926245', 'step': 18290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:48.958123', 'step': 18290, 'epoch': 3} {'type': 'loss', 'content': 0.08433104306459427, 'timestamp': '2025-09-30 22:34:48.968410', 'step': 18291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.002612', 'step': 18291, 'epoch': 3} {'type': 'loss', 'content': 0.06183591112494469, 'timestamp': '2025-09-30 22:34:49.027617', 'step': 18292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.064824', 'step': 18292, 'epoch': 3} {'type': 'loss', 'content': 0.074294313788414, 'timestamp': '2025-09-30 22:34:49.070646', 'step': 18293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:49.104871', 'step': 18293, 'epoch': 3} {'type': 'loss', 'content': 0.11214803904294968, 'timestamp': '2025-09-30 22:34:49.115825', 'step': 18294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.148550', 'step': 18294, 'epoch': 3} {'type': 'loss', 'content': 0.01434343308210373, 'timestamp': '2025-09-30 22:34:49.155603', 'step': 18295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.187696', 'step': 18295, 'epoch': 3} {'type': 'loss', 'content': 0.1490020900964737, 'timestamp': '2025-09-30 22:34:49.213405', 'step': 18296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:49.253543', 'step': 18296, 'epoch': 3} {'type': 'loss', 'content': 0.033726803958415985, 'timestamp': '2025-09-30 22:34:49.272269', 'step': 18297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.305057', 'step': 18297, 'epoch': 3} {'type': 'loss', 'content': 0.06766257435083389, 'timestamp': '2025-09-30 22:34:49.307290', 'step': 18298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.342141', 'step': 18298, 'epoch': 3} {'type': 'loss', 'content': 0.04297083616256714, 'timestamp': '2025-09-30 22:34:49.344894', 'step': 18299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:49.377571', 'step': 18299, 'epoch': 3} {'type': 'loss', 'content': 0.11492116749286652, 'timestamp': '2025-09-30 22:34:49.407698', 'step': 18300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:49.438872', 'step': 18300, 'epoch': 3} {'type': 'loss', 'content': 0.07741408795118332, 'timestamp': '2025-09-30 22:34:49.445438', 'step': 18301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.481571', 'step': 18301, 'epoch': 3} {'type': 'loss', 'content': 0.0822267159819603, 'timestamp': '2025-09-30 22:34:49.487505', 'step': 18302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.519496', 'step': 18302, 'epoch': 3} {'type': 'loss', 'content': 0.06697080284357071, 'timestamp': '2025-09-30 22:34:49.524375', 'step': 18303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.557646', 'step': 18303, 'epoch': 3} {'type': 'loss', 'content': 0.0388801209628582, 'timestamp': '2025-09-30 22:34:49.585625', 'step': 18304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.618003', 'step': 18304, 'epoch': 3} {'type': 'loss', 'content': 0.07127205282449722, 'timestamp': '2025-09-30 22:34:49.622453', 'step': 18305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.664349', 'step': 18305, 'epoch': 3} {'type': 'loss', 'content': 0.10321365296840668, 'timestamp': '2025-09-30 22:34:49.669380', 'step': 18306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.703544', 'step': 18306, 'epoch': 3} {'type': 'loss', 'content': 0.06726552546024323, 'timestamp': '2025-09-30 22:34:49.711569', 'step': 18307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.746663', 'step': 18307, 'epoch': 3} {'type': 'loss', 'content': 0.028729116544127464, 'timestamp': '2025-09-30 22:34:49.771972', 'step': 18308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.803630', 'step': 18308, 'epoch': 3} {'type': 'loss', 'content': 0.06652098149061203, 'timestamp': '2025-09-30 22:34:49.807239', 'step': 18309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.844784', 'step': 18309, 'epoch': 3} {'type': 'loss', 'content': 0.07672334462404251, 'timestamp': '2025-09-30 22:34:49.847757', 'step': 18310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:49.880567', 'step': 18310, 'epoch': 3} {'type': 'loss', 'content': 0.09458279609680176, 'timestamp': '2025-09-30 22:34:49.894520', 'step': 18311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:49.926839', 'step': 18311, 'epoch': 3} {'type': 'loss', 'content': 0.08783405274152756, 'timestamp': '2025-09-30 22:34:49.952820', 'step': 18312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:49.984031', 'step': 18312, 'epoch': 3} {'type': 'loss', 'content': 0.053665805608034134, 'timestamp': '2025-09-30 22:34:49.986652', 'step': 18313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:50.018714', 'step': 18313, 'epoch': 3} {'type': 'loss', 'content': 0.053282231092453, 'timestamp': '2025-09-30 22:34:50.023930', 'step': 18314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.060623', 'step': 18314, 'epoch': 3} {'type': 'loss', 'content': 0.10993938893079758, 'timestamp': '2025-09-30 22:34:50.069402', 'step': 18315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.101540', 'step': 18315, 'epoch': 3} {'type': 'loss', 'content': 0.08147274702787399, 'timestamp': '2025-09-30 22:34:50.128846', 'step': 18316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:50.160977', 'step': 18316, 'epoch': 3} {'type': 'loss', 'content': 0.10591576993465424, 'timestamp': '2025-09-30 22:34:50.163831', 'step': 18317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.194696', 'step': 18317, 'epoch': 3} {'type': 'loss', 'content': 0.08668959140777588, 'timestamp': '2025-09-30 22:34:50.204537', 'step': 18318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.239848', 'step': 18318, 'epoch': 3} {'type': 'loss', 'content': 0.11213425546884537, 'timestamp': '2025-09-30 22:34:50.244535', 'step': 18319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.283593', 'step': 18319, 'epoch': 3} {'type': 'loss', 'content': 0.03197348490357399, 'timestamp': '2025-09-30 22:34:50.307931', 'step': 18320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.338164', 'step': 18320, 'epoch': 3} {'type': 'loss', 'content': 0.08267421275377274, 'timestamp': '2025-09-30 22:34:50.346611', 'step': 18321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.376552', 'step': 18321, 'epoch': 3} {'type': 'loss', 'content': 0.08704307675361633, 'timestamp': '2025-09-30 22:34:50.381327', 'step': 18322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:50.414538', 'step': 18322, 'epoch': 3} {'type': 'loss', 'content': 0.11921724677085876, 'timestamp': '2025-09-30 22:34:50.428202', 'step': 18323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.461497', 'step': 18323, 'epoch': 3} {'type': 'loss', 'content': 0.08050278574228287, 'timestamp': '2025-09-30 22:34:50.486542', 'step': 18324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.522097', 'step': 18324, 'epoch': 3} {'type': 'loss', 'content': 0.04843025282025337, 'timestamp': '2025-09-30 22:34:50.525743', 'step': 18325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:50.558801', 'step': 18325, 'epoch': 3} {'type': 'loss', 'content': 0.06546874344348907, 'timestamp': '2025-09-30 22:34:50.566585', 'step': 18326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:50.603277', 'step': 18326, 'epoch': 3} {'type': 'loss', 'content': 0.08620106428861618, 'timestamp': '2025-09-30 22:34:50.607060', 'step': 18327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.639277', 'step': 18327, 'epoch': 3} {'type': 'loss', 'content': 0.0969925969839096, 'timestamp': '2025-09-30 22:34:50.670340', 'step': 18328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:50.701534', 'step': 18328, 'epoch': 3} {'type': 'loss', 'content': 0.02782304771244526, 'timestamp': '2025-09-30 22:34:50.713051', 'step': 18329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.745808', 'step': 18329, 'epoch': 3} {'type': 'loss', 'content': 0.03209441900253296, 'timestamp': '2025-09-30 22:34:50.754932', 'step': 18330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.789405', 'step': 18330, 'epoch': 3} {'type': 'loss', 'content': 0.034995149821043015, 'timestamp': '2025-09-30 22:34:50.792177', 'step': 18331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:50.825987', 'step': 18331, 'epoch': 3} {'type': 'loss', 'content': 0.09840787947177887, 'timestamp': '2025-09-30 22:34:50.855994', 'step': 18332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.887752', 'step': 18332, 'epoch': 3} {'type': 'loss', 'content': 0.09802784770727158, 'timestamp': '2025-09-30 22:34:50.892448', 'step': 18333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:50.926475', 'step': 18333, 'epoch': 3} {'type': 'loss', 'content': 0.02915494330227375, 'timestamp': '2025-09-30 22:34:50.929789', 'step': 18334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:50.960694', 'step': 18334, 'epoch': 3} {'type': 'loss', 'content': 0.0871925801038742, 'timestamp': '2025-09-30 22:34:50.964683', 'step': 18335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.007774', 'step': 18335, 'epoch': 3} {'type': 'loss', 'content': 0.08922386169433594, 'timestamp': '2025-09-30 22:34:51.032900', 'step': 18336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.065328', 'step': 18336, 'epoch': 3} {'type': 'loss', 'content': 0.009455885738134384, 'timestamp': '2025-09-30 22:34:51.071479', 'step': 18337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:51.107268', 'step': 18337, 'epoch': 3} {'type': 'loss', 'content': 0.040860336273908615, 'timestamp': '2025-09-30 22:34:51.116136', 'step': 18338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:51.151980', 'step': 18338, 'epoch': 3} {'type': 'loss', 'content': 0.04040928930044174, 'timestamp': '2025-09-30 22:34:51.155424', 'step': 18339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:51.187027', 'step': 18339, 'epoch': 3} {'type': 'loss', 'content': 0.022812742739915848, 'timestamp': '2025-09-30 22:34:51.217721', 'step': 18340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.251092', 'step': 18340, 'epoch': 3} {'type': 'loss', 'content': 0.03560062497854233, 'timestamp': '2025-09-30 22:34:51.254400', 'step': 18341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.286049', 'step': 18341, 'epoch': 3} {'type': 'loss', 'content': 0.0516839437186718, 'timestamp': '2025-09-30 22:34:51.288528', 'step': 18342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:51.319528', 'step': 18342, 'epoch': 3} {'type': 'loss', 'content': 0.10255344957113266, 'timestamp': '2025-09-30 22:34:51.324643', 'step': 18343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:51.358680', 'step': 18343, 'epoch': 3} {'type': 'loss', 'content': 0.06329146027565002, 'timestamp': '2025-09-30 22:34:51.384942', 'step': 18344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.417017', 'step': 18344, 'epoch': 3} {'type': 'loss', 'content': 0.04400651901960373, 'timestamp': '2025-09-30 22:34:51.419494', 'step': 18345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.452491', 'step': 18345, 'epoch': 3} {'type': 'loss', 'content': 0.031823474913835526, 'timestamp': '2025-09-30 22:34:51.460566', 'step': 18346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:51.495265', 'step': 18346, 'epoch': 3} {'type': 'loss', 'content': 0.1525941789150238, 'timestamp': '2025-09-30 22:34:51.498149', 'step': 18347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:51.538964', 'step': 18347, 'epoch': 3} {'type': 'loss', 'content': 0.14530041813850403, 'timestamp': '2025-09-30 22:34:51.564557', 'step': 18348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:51.596059', 'step': 18348, 'epoch': 3} {'type': 'loss', 'content': 0.035918861627578735, 'timestamp': '2025-09-30 22:34:51.600220', 'step': 18349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:51.633222', 'step': 18349, 'epoch': 3} {'type': 'loss', 'content': 0.09303366392850876, 'timestamp': '2025-09-30 22:34:51.639958', 'step': 18350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.674321', 'step': 18350, 'epoch': 3} {'type': 'loss', 'content': 0.09049073606729507, 'timestamp': '2025-09-30 22:34:51.680525', 'step': 18351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.713997', 'step': 18351, 'epoch': 3} {'type': 'loss', 'content': 0.05066633224487305, 'timestamp': '2025-09-30 22:34:51.740386', 'step': 18352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.783963', 'step': 18352, 'epoch': 3} {'type': 'loss', 'content': 0.04434719681739807, 'timestamp': '2025-09-30 22:34:51.796532', 'step': 18353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.832977', 'step': 18353, 'epoch': 3} {'type': 'loss', 'content': 0.06178581342101097, 'timestamp': '2025-09-30 22:34:51.837397', 'step': 18354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.876291', 'step': 18354, 'epoch': 3} {'type': 'loss', 'content': 0.02901296131312847, 'timestamp': '2025-09-30 22:34:51.881615', 'step': 18355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:51.914156', 'step': 18355, 'epoch': 3} {'type': 'loss', 'content': 0.17851978540420532, 'timestamp': '2025-09-30 22:34:51.944201', 'step': 18356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:51.978773', 'step': 18356, 'epoch': 3} {'type': 'loss', 'content': 0.07149581611156464, 'timestamp': '2025-09-30 22:34:51.981299', 'step': 18357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.016207', 'step': 18357, 'epoch': 3} {'type': 'loss', 'content': 0.15030063688755035, 'timestamp': '2025-09-30 22:34:52.027284', 'step': 18358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.069512', 'step': 18358, 'epoch': 3} {'type': 'loss', 'content': 0.03785867989063263, 'timestamp': '2025-09-30 22:34:52.073350', 'step': 18359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:52.109199', 'step': 18359, 'epoch': 3} {'type': 'loss', 'content': 0.09241249412298203, 'timestamp': '2025-09-30 22:34:52.138307', 'step': 18360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.174945', 'step': 18360, 'epoch': 3} {'type': 'loss', 'content': 0.052797768265008926, 'timestamp': '2025-09-30 22:34:52.187331', 'step': 18361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:52.219404', 'step': 18361, 'epoch': 3} {'type': 'loss', 'content': 0.12331965565681458, 'timestamp': '2025-09-30 22:34:52.223342', 'step': 18362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.255620', 'step': 18362, 'epoch': 3} {'type': 'loss', 'content': 0.07601192593574524, 'timestamp': '2025-09-30 22:34:52.260255', 'step': 18363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:52.294056', 'step': 18363, 'epoch': 3} {'type': 'loss', 'content': 0.05004759505391121, 'timestamp': '2025-09-30 22:34:52.326024', 'step': 18364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:52.358389', 'step': 18364, 'epoch': 3} {'type': 'loss', 'content': 0.12754565477371216, 'timestamp': '2025-09-30 22:34:52.362480', 'step': 18365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:52.393420', 'step': 18365, 'epoch': 3} {'type': 'loss', 'content': 0.06305672973394394, 'timestamp': '2025-09-30 22:34:52.405220', 'step': 18366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.444942', 'step': 18366, 'epoch': 3} {'type': 'loss', 'content': 0.02644241601228714, 'timestamp': '2025-09-30 22:34:52.448099', 'step': 18367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.487669', 'step': 18367, 'epoch': 3} {'type': 'loss', 'content': 0.08871527016162872, 'timestamp': '2025-09-30 22:34:52.522302', 'step': 18368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.559870', 'step': 18368, 'epoch': 3} {'type': 'loss', 'content': 0.09269595146179199, 'timestamp': '2025-09-30 22:34:52.573324', 'step': 18369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.610786', 'step': 18369, 'epoch': 3} {'type': 'loss', 'content': 0.04122175648808479, 'timestamp': '2025-09-30 22:34:52.619797', 'step': 18370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:52.650828', 'step': 18370, 'epoch': 3} {'type': 'loss', 'content': 0.10326645523309708, 'timestamp': '2025-09-30 22:34:52.654986', 'step': 18371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.694859', 'step': 18371, 'epoch': 3} {'type': 'loss', 'content': 0.12754684686660767, 'timestamp': '2025-09-30 22:34:52.725609', 'step': 18372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:52.760543', 'step': 18372, 'epoch': 3} {'type': 'loss', 'content': 0.06001884490251541, 'timestamp': '2025-09-30 22:34:52.763920', 'step': 18373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:52.799227', 'step': 18373, 'epoch': 3} {'type': 'loss', 'content': 0.0779656395316124, 'timestamp': '2025-09-30 22:34:52.803464', 'step': 18374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.837979', 'step': 18374, 'epoch': 3} {'type': 'loss', 'content': 0.055019695311784744, 'timestamp': '2025-09-30 22:34:52.851290', 'step': 18375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:52.884122', 'step': 18375, 'epoch': 3} {'type': 'loss', 'content': 0.10021626949310303, 'timestamp': '2025-09-30 22:34:52.911685', 'step': 18376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.941703', 'step': 18376, 'epoch': 3} {'type': 'loss', 'content': 0.013231554999947548, 'timestamp': '2025-09-30 22:34:52.950219', 'step': 18377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:52.984853', 'step': 18377, 'epoch': 3} {'type': 'loss', 'content': 0.04311933368444443, 'timestamp': '2025-09-30 22:34:52.988790', 'step': 18378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.024101', 'step': 18378, 'epoch': 3} {'type': 'loss', 'content': 0.09838994592428207, 'timestamp': '2025-09-30 22:34:53.027603', 'step': 18379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:53.058830', 'step': 18379, 'epoch': 3} {'type': 'loss', 'content': 0.0811193510890007, 'timestamp': '2025-09-30 22:34:53.083340', 'step': 18380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.120316', 'step': 18380, 'epoch': 3} {'type': 'loss', 'content': 0.06328913569450378, 'timestamp': '2025-09-30 22:34:53.124210', 'step': 18381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.156839', 'step': 18381, 'epoch': 3} {'type': 'loss', 'content': 0.02172643318772316, 'timestamp': '2025-09-30 22:34:53.161940', 'step': 18382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.195088', 'step': 18382, 'epoch': 3} {'type': 'loss', 'content': 0.10056997090578079, 'timestamp': '2025-09-30 22:34:53.200225', 'step': 18383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.242684', 'step': 18383, 'epoch': 3} {'type': 'loss', 'content': 0.1271279901266098, 'timestamp': '2025-09-30 22:34:53.270357', 'step': 18384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.303575', 'step': 18384, 'epoch': 3} {'type': 'loss', 'content': 0.09232215583324432, 'timestamp': '2025-09-30 22:34:53.311998', 'step': 18385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.347029', 'step': 18385, 'epoch': 3} {'type': 'loss', 'content': 0.04483046755194664, 'timestamp': '2025-09-30 22:34:53.351512', 'step': 18386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.392472', 'step': 18386, 'epoch': 3} {'type': 'loss', 'content': 0.11068953573703766, 'timestamp': '2025-09-30 22:34:53.396913', 'step': 18387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.429477', 'step': 18387, 'epoch': 3} {'type': 'loss', 'content': 0.02888241969048977, 'timestamp': '2025-09-30 22:34:53.461494', 'step': 18388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.493937', 'step': 18388, 'epoch': 3} {'type': 'loss', 'content': 0.0598856545984745, 'timestamp': '2025-09-30 22:34:53.498241', 'step': 18389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.534889', 'step': 18389, 'epoch': 3} {'type': 'loss', 'content': 0.07096196711063385, 'timestamp': '2025-09-30 22:34:53.544497', 'step': 18390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:53.579372', 'step': 18390, 'epoch': 3} {'type': 'loss', 'content': 0.09680494666099548, 'timestamp': '2025-09-30 22:34:53.586279', 'step': 18391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.623927', 'step': 18391, 'epoch': 3} {'type': 'loss', 'content': 0.08153234422206879, 'timestamp': '2025-09-30 22:34:53.649894', 'step': 18392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:53.680791', 'step': 18392, 'epoch': 3} {'type': 'loss', 'content': 0.04874076694250107, 'timestamp': '2025-09-30 22:34:53.684475', 'step': 18393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:53.720288', 'step': 18393, 'epoch': 3} {'type': 'loss', 'content': 0.0633576288819313, 'timestamp': '2025-09-30 22:34:53.724733', 'step': 18394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:53.759432', 'step': 18394, 'epoch': 3} {'type': 'loss', 'content': 0.10390264540910721, 'timestamp': '2025-09-30 22:34:53.761950', 'step': 18395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.793355', 'step': 18395, 'epoch': 3} {'type': 'loss', 'content': 0.14003896713256836, 'timestamp': '2025-09-30 22:34:53.817542', 'step': 18396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.851492', 'step': 18396, 'epoch': 3} {'type': 'loss', 'content': 0.023342829197645187, 'timestamp': '2025-09-30 22:34:53.854648', 'step': 18397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:53.886102', 'step': 18397, 'epoch': 3} {'type': 'loss', 'content': 0.12850800156593323, 'timestamp': '2025-09-30 22:34:53.889432', 'step': 18398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.920361', 'step': 18398, 'epoch': 3} {'type': 'loss', 'content': 0.0400581918656826, 'timestamp': '2025-09-30 22:34:53.924795', 'step': 18399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:53.955696', 'step': 18399, 'epoch': 3} {'type': 'loss', 'content': 0.0888742059469223, 'timestamp': '2025-09-30 22:34:53.993920', 'step': 18400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:54.035828', 'step': 18400, 'epoch': 3} {'type': 'loss', 'content': 0.13734187185764313, 'timestamp': '2025-09-30 22:34:54.038823', 'step': 18401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:54.074881', 'step': 18401, 'epoch': 3} {'type': 'loss', 'content': 0.1532476246356964, 'timestamp': '2025-09-30 22:34:54.077259', 'step': 18402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:54.108188', 'step': 18402, 'epoch': 3} {'type': 'loss', 'content': 0.07430259883403778, 'timestamp': '2025-09-30 22:34:54.114182', 'step': 18403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:54.149752', 'step': 18403, 'epoch': 3} {'type': 'loss', 'content': 0.055024370551109314, 'timestamp': '2025-09-30 22:34:54.174445', 'step': 18404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.205523', 'step': 18404, 'epoch': 3} {'type': 'loss', 'content': 0.06480272859334946, 'timestamp': '2025-09-30 22:34:54.219858', 'step': 18405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.254313', 'step': 18405, 'epoch': 3} {'type': 'loss', 'content': 0.036467377096414566, 'timestamp': '2025-09-30 22:34:54.257635', 'step': 18406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:54.292164', 'step': 18406, 'epoch': 3} {'type': 'loss', 'content': 0.057270824909210205, 'timestamp': '2025-09-30 22:34:54.302781', 'step': 18407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:54.341261', 'step': 18407, 'epoch': 3} {'type': 'loss', 'content': 0.08136965334415436, 'timestamp': '2025-09-30 22:34:54.365753', 'step': 18408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.397099', 'step': 18408, 'epoch': 3} {'type': 'loss', 'content': 0.06220504269003868, 'timestamp': '2025-09-30 22:34:54.408028', 'step': 18409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:54.439997', 'step': 18409, 'epoch': 3} {'type': 'loss', 'content': 0.018065789714455605, 'timestamp': '2025-09-30 22:34:54.445117', 'step': 18410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:54.481063', 'step': 18410, 'epoch': 3} {'type': 'loss', 'content': 0.043373953551054, 'timestamp': '2025-09-30 22:34:54.491838', 'step': 18411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.530658', 'step': 18411, 'epoch': 3} {'type': 'loss', 'content': 0.05369817093014717, 'timestamp': '2025-09-30 22:34:54.556646', 'step': 18412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.587875', 'step': 18412, 'epoch': 3} {'type': 'loss', 'content': 0.021430728957057, 'timestamp': '2025-09-30 22:34:54.591454', 'step': 18413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.621102', 'step': 18413, 'epoch': 3} {'type': 'loss', 'content': 0.10392076522111893, 'timestamp': '2025-09-30 22:34:54.626052', 'step': 18414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:54.657773', 'step': 18414, 'epoch': 3} {'type': 'loss', 'content': 0.05921270698308945, 'timestamp': '2025-09-30 22:34:54.669150', 'step': 18415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:54.706521', 'step': 18415, 'epoch': 3} {'type': 'loss', 'content': 0.10844966769218445, 'timestamp': '2025-09-30 22:34:54.732678', 'step': 18416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:54.768097', 'step': 18416, 'epoch': 3} {'type': 'loss', 'content': 0.029102157801389694, 'timestamp': '2025-09-30 22:34:54.774696', 'step': 18417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.810636', 'step': 18417, 'epoch': 3} {'type': 'loss', 'content': 0.08400099724531174, 'timestamp': '2025-09-30 22:34:54.814004', 'step': 18418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.852482', 'step': 18418, 'epoch': 3} {'type': 'loss', 'content': 0.06548113375902176, 'timestamp': '2025-09-30 22:34:54.861469', 'step': 18419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:54.897421', 'step': 18419, 'epoch': 3} {'type': 'loss', 'content': 0.04775681346654892, 'timestamp': '2025-09-30 22:34:54.922575', 'step': 18420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:54.960040', 'step': 18420, 'epoch': 3} {'type': 'loss', 'content': 0.07093949615955353, 'timestamp': '2025-09-30 22:34:54.963878', 'step': 18421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.006417', 'step': 18421, 'epoch': 3} {'type': 'loss', 'content': 0.0740617960691452, 'timestamp': '2025-09-30 22:34:55.010375', 'step': 18422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:55.042579', 'step': 18422, 'epoch': 3} {'type': 'loss', 'content': 0.033607762306928635, 'timestamp': '2025-09-30 22:34:55.046840', 'step': 18423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:55.077517', 'step': 18423, 'epoch': 3} {'type': 'loss', 'content': 0.06654364615678787, 'timestamp': '2025-09-30 22:34:55.109693', 'step': 18424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.142535', 'step': 18424, 'epoch': 3} {'type': 'loss', 'content': 0.05551040545105934, 'timestamp': '2025-09-30 22:34:55.153423', 'step': 18425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:55.188989', 'step': 18425, 'epoch': 3} {'type': 'loss', 'content': 0.05067546293139458, 'timestamp': '2025-09-30 22:34:55.204748', 'step': 18426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.236056', 'step': 18426, 'epoch': 3} {'type': 'loss', 'content': 0.06709345430135727, 'timestamp': '2025-09-30 22:34:55.240396', 'step': 18427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.272400', 'step': 18427, 'epoch': 3} {'type': 'loss', 'content': 0.06641669571399689, 'timestamp': '2025-09-30 22:34:55.299021', 'step': 18428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.344154', 'step': 18428, 'epoch': 3} {'type': 'loss', 'content': 0.08656365424394608, 'timestamp': '2025-09-30 22:34:55.347155', 'step': 18429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.379818', 'step': 18429, 'epoch': 3} {'type': 'loss', 'content': 0.11263001710176468, 'timestamp': '2025-09-30 22:34:55.384307', 'step': 18430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.423200', 'step': 18430, 'epoch': 3} {'type': 'loss', 'content': 0.07339407503604889, 'timestamp': '2025-09-30 22:34:55.426712', 'step': 18431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:55.469155', 'step': 18431, 'epoch': 3} {'type': 'loss', 'content': 0.08085577934980392, 'timestamp': '2025-09-30 22:34:55.493997', 'step': 18432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.534347', 'step': 18432, 'epoch': 3} {'type': 'loss', 'content': 0.019874107092618942, 'timestamp': '2025-09-30 22:34:55.538960', 'step': 18433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:55.571526', 'step': 18433, 'epoch': 3} {'type': 'loss', 'content': 0.11270785331726074, 'timestamp': '2025-09-30 22:34:55.578491', 'step': 18434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.614436', 'step': 18434, 'epoch': 3} {'type': 'loss', 'content': 0.08696013689041138, 'timestamp': '2025-09-30 22:34:55.619070', 'step': 18435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.650859', 'step': 18435, 'epoch': 3} {'type': 'loss', 'content': 0.04657433182001114, 'timestamp': '2025-09-30 22:34:55.675781', 'step': 18436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.717454', 'step': 18436, 'epoch': 3} {'type': 'loss', 'content': 0.09874942898750305, 'timestamp': '2025-09-30 22:34:55.721296', 'step': 18437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:55.752876', 'step': 18437, 'epoch': 3} {'type': 'loss', 'content': 0.10062675178050995, 'timestamp': '2025-09-30 22:34:55.767222', 'step': 18438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:55.806323', 'step': 18438, 'epoch': 3} {'type': 'loss', 'content': 0.054759178310632706, 'timestamp': '2025-09-30 22:34:55.821381', 'step': 18439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:55.858037', 'step': 18439, 'epoch': 3} {'type': 'loss', 'content': 0.09337261319160461, 'timestamp': '2025-09-30 22:34:55.885748', 'step': 18440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:55.920890', 'step': 18440, 'epoch': 3} {'type': 'loss', 'content': 0.0638592317700386, 'timestamp': '2025-09-30 22:34:55.926459', 'step': 18441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:55.967937', 'step': 18441, 'epoch': 3} {'type': 'loss', 'content': 0.061568815261125565, 'timestamp': '2025-09-30 22:34:55.980399', 'step': 18442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:56.024261', 'step': 18442, 'epoch': 3} {'type': 'loss', 'content': 0.11587432026863098, 'timestamp': '2025-09-30 22:34:56.028912', 'step': 18443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.061018', 'step': 18443, 'epoch': 3} {'type': 'loss', 'content': 0.04239929839968681, 'timestamp': '2025-09-30 22:34:56.088899', 'step': 18444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:34:56.129163', 'step': 18444, 'epoch': 3} {'type': 'loss', 'content': 0.05616822466254234, 'timestamp': '2025-09-30 22:34:56.140814', 'step': 18445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:56.182547', 'step': 18445, 'epoch': 3} {'type': 'loss', 'content': 0.05618611350655556, 'timestamp': '2025-09-30 22:34:56.187938', 'step': 18446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.224886', 'step': 18446, 'epoch': 3} {'type': 'loss', 'content': 0.06581911444664001, 'timestamp': '2025-09-30 22:34:56.229215', 'step': 18447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:56.282891', 'step': 18447, 'epoch': 3} {'type': 'loss', 'content': 0.016350170597434044, 'timestamp': '2025-09-30 22:34:56.309751', 'step': 18448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:56.342287', 'step': 18448, 'epoch': 3} {'type': 'loss', 'content': 0.05698664113879204, 'timestamp': '2025-09-30 22:34:56.346511', 'step': 18449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:56.387771', 'step': 18449, 'epoch': 3} {'type': 'loss', 'content': 0.08130533248186111, 'timestamp': '2025-09-30 22:34:56.391368', 'step': 18450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:56.432854', 'step': 18450, 'epoch': 3} {'type': 'loss', 'content': 0.03228338435292244, 'timestamp': '2025-09-30 22:34:56.442717', 'step': 18451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:56.476197', 'step': 18451, 'epoch': 3} {'type': 'loss', 'content': 0.03149693086743355, 'timestamp': '2025-09-30 22:34:56.504433', 'step': 18452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.549793', 'step': 18452, 'epoch': 3} {'type': 'loss', 'content': 0.08884710818529129, 'timestamp': '2025-09-30 22:34:56.561019', 'step': 18453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:56.600121', 'step': 18453, 'epoch': 3} {'type': 'loss', 'content': 0.07139912992715836, 'timestamp': '2025-09-30 22:34:56.604840', 'step': 18454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.637047', 'step': 18454, 'epoch': 3} {'type': 'loss', 'content': 0.048348117619752884, 'timestamp': '2025-09-30 22:34:56.641563', 'step': 18455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.673402', 'step': 18455, 'epoch': 3} {'type': 'loss', 'content': 0.07311306893825531, 'timestamp': '2025-09-30 22:34:56.704206', 'step': 18456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:56.739597', 'step': 18456, 'epoch': 3} {'type': 'loss', 'content': 0.05011998862028122, 'timestamp': '2025-09-30 22:34:56.747591', 'step': 18457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:56.790282', 'step': 18457, 'epoch': 3} {'type': 'loss', 'content': 0.02934250421822071, 'timestamp': '2025-09-30 22:34:56.794380', 'step': 18458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:56.835671', 'step': 18458, 'epoch': 3} {'type': 'loss', 'content': 0.09718111157417297, 'timestamp': '2025-09-30 22:34:56.849795', 'step': 18459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:56.889641', 'step': 18459, 'epoch': 3} {'type': 'loss', 'content': 0.1108943372964859, 'timestamp': '2025-09-30 22:34:56.927209', 'step': 18460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:56.970066', 'step': 18460, 'epoch': 3} {'type': 'loss', 'content': 0.0776548758149147, 'timestamp': '2025-09-30 22:34:56.976516', 'step': 18461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:57.017821', 'step': 18461, 'epoch': 3} {'type': 'loss', 'content': 0.07922207564115524, 'timestamp': '2025-09-30 22:34:57.024101', 'step': 18462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:57.057197', 'step': 18462, 'epoch': 3} {'type': 'loss', 'content': 0.10349275171756744, 'timestamp': '2025-09-30 22:34:57.070637', 'step': 18463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.105949', 'step': 18463, 'epoch': 3} {'type': 'loss', 'content': 0.021319083869457245, 'timestamp': '2025-09-30 22:34:57.138031', 'step': 18464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.176495', 'step': 18464, 'epoch': 3} {'type': 'loss', 'content': 0.04589712619781494, 'timestamp': '2025-09-30 22:34:57.182286', 'step': 18465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.226173', 'step': 18465, 'epoch': 3} {'type': 'loss', 'content': 0.10651341825723648, 'timestamp': '2025-09-30 22:34:57.235407', 'step': 18466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:34:57.270948', 'step': 18466, 'epoch': 3} {'type': 'loss', 'content': 0.05949695408344269, 'timestamp': '2025-09-30 22:34:57.276573', 'step': 18467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:57.311099', 'step': 18467, 'epoch': 3} {'type': 'loss', 'content': 0.041574496775865555, 'timestamp': '2025-09-30 22:34:57.344888', 'step': 18468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:57.382522', 'step': 18468, 'epoch': 3} {'type': 'loss', 'content': 0.022567685693502426, 'timestamp': '2025-09-30 22:34:57.387187', 'step': 18469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.420762', 'step': 18469, 'epoch': 3} {'type': 'loss', 'content': 0.06662081927061081, 'timestamp': '2025-09-30 22:34:57.427394', 'step': 18470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:57.461341', 'step': 18470, 'epoch': 3} {'type': 'loss', 'content': 0.05436071753501892, 'timestamp': '2025-09-30 22:34:57.478558', 'step': 18471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:57.521485', 'step': 18471, 'epoch': 3} {'type': 'loss', 'content': 0.037260886281728745, 'timestamp': '2025-09-30 22:34:57.548292', 'step': 18472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.592475', 'step': 18472, 'epoch': 3} {'type': 'loss', 'content': 0.0424186997115612, 'timestamp': '2025-09-30 22:34:57.596433', 'step': 18473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:57.628568', 'step': 18473, 'epoch': 3} {'type': 'loss', 'content': 0.08686142414808273, 'timestamp': '2025-09-30 22:34:57.632001', 'step': 18474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:57.667965', 'step': 18474, 'epoch': 3} {'type': 'loss', 'content': 0.04393938556313515, 'timestamp': '2025-09-30 22:34:57.672797', 'step': 18475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:57.706453', 'step': 18475, 'epoch': 3} {'type': 'loss', 'content': 0.016469648107886314, 'timestamp': '2025-09-30 22:34:57.733304', 'step': 18476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:57.776683', 'step': 18476, 'epoch': 3} {'type': 'loss', 'content': 0.0686124637722969, 'timestamp': '2025-09-30 22:34:57.783039', 'step': 18477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.825529', 'step': 18477, 'epoch': 3} {'type': 'loss', 'content': 0.07336746901273727, 'timestamp': '2025-09-30 22:34:57.829957', 'step': 18478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.874484', 'step': 18478, 'epoch': 3} {'type': 'loss', 'content': 0.1234232559800148, 'timestamp': '2025-09-30 22:34:57.879475', 'step': 18479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:57.923359', 'step': 18479, 'epoch': 3} {'type': 'loss', 'content': 0.012507792562246323, 'timestamp': '2025-09-30 22:34:57.951279', 'step': 18480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:57.998052', 'step': 18480, 'epoch': 3} {'type': 'loss', 'content': 0.09458368271589279, 'timestamp': '2025-09-30 22:34:58.003451', 'step': 18481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:58.036761', 'step': 18481, 'epoch': 3} {'type': 'loss', 'content': 0.08960486203432083, 'timestamp': '2025-09-30 22:34:58.039873', 'step': 18482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:58.080179', 'step': 18482, 'epoch': 3} {'type': 'loss', 'content': 0.04892108961939812, 'timestamp': '2025-09-30 22:34:58.086613', 'step': 18483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:58.119425', 'step': 18483, 'epoch': 3} {'type': 'loss', 'content': 0.059043776243925095, 'timestamp': '2025-09-30 22:34:58.145958', 'step': 18484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.178670', 'step': 18484, 'epoch': 3} {'type': 'loss', 'content': 0.02180333249270916, 'timestamp': '2025-09-30 22:34:58.184879', 'step': 18485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:58.217466', 'step': 18485, 'epoch': 3} {'type': 'loss', 'content': 0.13327805697917938, 'timestamp': '2025-09-30 22:34:58.235591', 'step': 18486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.267154', 'step': 18486, 'epoch': 3} {'type': 'loss', 'content': 0.03252382576465607, 'timestamp': '2025-09-30 22:34:58.272503', 'step': 18487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.304539', 'step': 18487, 'epoch': 3} {'type': 'loss', 'content': 0.05251915007829666, 'timestamp': '2025-09-30 22:34:58.341117', 'step': 18488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:58.381310', 'step': 18488, 'epoch': 3} {'type': 'loss', 'content': 0.035199109464883804, 'timestamp': '2025-09-30 22:34:58.394617', 'step': 18489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.438165', 'step': 18489, 'epoch': 3} {'type': 'loss', 'content': 0.06803499907255173, 'timestamp': '2025-09-30 22:34:58.443807', 'step': 18490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:34:58.478225', 'step': 18490, 'epoch': 3} {'type': 'loss', 'content': 0.09840696305036545, 'timestamp': '2025-09-30 22:34:58.484557', 'step': 18491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:58.517791', 'step': 18491, 'epoch': 3} {'type': 'loss', 'content': 0.10559175908565521, 'timestamp': '2025-09-30 22:34:58.543821', 'step': 18492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:34:58.587291', 'step': 18492, 'epoch': 3} {'type': 'loss', 'content': 0.06301002204418182, 'timestamp': '2025-09-30 22:34:58.601923', 'step': 18493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:34:58.635988', 'step': 18493, 'epoch': 3} {'type': 'loss', 'content': 0.0982993021607399, 'timestamp': '2025-09-30 22:34:58.648946', 'step': 18494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:34:58.681991', 'step': 18494, 'epoch': 3} {'type': 'loss', 'content': 0.044994745403528214, 'timestamp': '2025-09-30 22:34:58.687958', 'step': 18495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.730703', 'step': 18495, 'epoch': 3} {'type': 'loss', 'content': 0.061169855296611786, 'timestamp': '2025-09-30 22:34:58.755404', 'step': 18496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:58.802181', 'step': 18496, 'epoch': 3} {'type': 'loss', 'content': 0.08243010193109512, 'timestamp': '2025-09-30 22:34:58.809118', 'step': 18497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:58.842575', 'step': 18497, 'epoch': 3} {'type': 'loss', 'content': 0.12460414320230484, 'timestamp': '2025-09-30 22:34:58.846736', 'step': 18498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:34:58.884209', 'step': 18498, 'epoch': 3} {'type': 'loss', 'content': 0.030070170760154724, 'timestamp': '2025-09-30 22:34:58.900441', 'step': 18499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:34:58.934820', 'step': 18499, 'epoch': 3} {'type': 'loss', 'content': 0.051121536642313004, 'timestamp': '2025-09-30 22:34:58.962048', 'step': 18500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 18500', 'timestamp': '2025-09-30 22:35:03.956105', 'step': 18500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.003601', 'step': 18500, 'epoch': 3} {'type': 'loss', 'content': 0.0983782708644867, 'timestamp': '2025-09-30 22:35:04.007925', 'step': 18501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:04.040325', 'step': 18501, 'epoch': 3} {'type': 'loss', 'content': 0.07226964086294174, 'timestamp': '2025-09-30 22:35:04.051113', 'step': 18502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.086848', 'step': 18502, 'epoch': 3} {'type': 'loss', 'content': 0.04335803911089897, 'timestamp': '2025-09-30 22:35:04.093671', 'step': 18503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:04.136981', 'step': 18503, 'epoch': 3} {'type': 'loss', 'content': 0.05898931249976158, 'timestamp': '2025-09-30 22:35:04.163280', 'step': 18504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.204083', 'step': 18504, 'epoch': 3} {'type': 'loss', 'content': 0.0931704044342041, 'timestamp': '2025-09-30 22:35:04.209319', 'step': 18505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.242495', 'step': 18505, 'epoch': 3} {'type': 'loss', 'content': 0.11836238950490952, 'timestamp': '2025-09-30 22:35:04.245033', 'step': 18506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.278032', 'step': 18506, 'epoch': 3} {'type': 'loss', 'content': 0.017395198345184326, 'timestamp': '2025-09-30 22:35:04.282017', 'step': 18507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.314344', 'step': 18507, 'epoch': 3} {'type': 'loss', 'content': 0.061863165348768234, 'timestamp': '2025-09-30 22:35:04.339831', 'step': 18508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.378357', 'step': 18508, 'epoch': 3} {'type': 'loss', 'content': 0.08074364066123962, 'timestamp': '2025-09-30 22:35:04.388590', 'step': 18509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.419960', 'step': 18509, 'epoch': 3} {'type': 'loss', 'content': 0.04262885823845863, 'timestamp': '2025-09-30 22:35:04.425351', 'step': 18510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.457516', 'step': 18510, 'epoch': 3} {'type': 'loss', 'content': 0.028416767716407776, 'timestamp': '2025-09-30 22:35:04.461007', 'step': 18511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.498793', 'step': 18511, 'epoch': 3} {'type': 'loss', 'content': 0.027023619040846825, 'timestamp': '2025-09-30 22:35:04.531728', 'step': 18512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:04.572841', 'step': 18512, 'epoch': 3} {'type': 'loss', 'content': 0.039152417331933975, 'timestamp': '2025-09-30 22:35:04.578837', 'step': 18513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.611320', 'step': 18513, 'epoch': 3} {'type': 'loss', 'content': 0.0955282673239708, 'timestamp': '2025-09-30 22:35:04.614377', 'step': 18514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:04.646149', 'step': 18514, 'epoch': 3} {'type': 'loss', 'content': 0.07588844001293182, 'timestamp': '2025-09-30 22:35:04.650806', 'step': 18515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:04.683895', 'step': 18515, 'epoch': 3} {'type': 'loss', 'content': 0.029613260179758072, 'timestamp': '2025-09-30 22:35:04.710178', 'step': 18516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:04.742269', 'step': 18516, 'epoch': 3} {'type': 'loss', 'content': 0.07957690954208374, 'timestamp': '2025-09-30 22:35:04.747238', 'step': 18517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:04.779323', 'step': 18517, 'epoch': 3} {'type': 'loss', 'content': 0.06982669979333878, 'timestamp': '2025-09-30 22:35:04.789140', 'step': 18518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.822809', 'step': 18518, 'epoch': 3} {'type': 'loss', 'content': 0.03144548460841179, 'timestamp': '2025-09-30 22:35:04.835927', 'step': 18519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:04.877342', 'step': 18519, 'epoch': 3} {'type': 'loss', 'content': 0.06317336857318878, 'timestamp': '2025-09-30 22:35:04.903909', 'step': 18520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.934406', 'step': 18520, 'epoch': 3} {'type': 'loss', 'content': 0.09423419088125229, 'timestamp': '2025-09-30 22:35:04.938134', 'step': 18521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:04.972139', 'step': 18521, 'epoch': 3} {'type': 'loss', 'content': 0.015612286515533924, 'timestamp': '2025-09-30 22:35:04.983410', 'step': 18522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.014867', 'step': 18522, 'epoch': 3} {'type': 'loss', 'content': 0.02376130409538746, 'timestamp': '2025-09-30 22:35:05.020953', 'step': 18523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.052927', 'step': 18523, 'epoch': 3} {'type': 'loss', 'content': 0.06124257668852806, 'timestamp': '2025-09-30 22:35:05.078293', 'step': 18524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.116849', 'step': 18524, 'epoch': 3} {'type': 'loss', 'content': 0.0671728178858757, 'timestamp': '2025-09-30 22:35:05.129213', 'step': 18525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:05.162792', 'step': 18525, 'epoch': 3} {'type': 'loss', 'content': 0.040440138429403305, 'timestamp': '2025-09-30 22:35:05.168002', 'step': 18526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.201134', 'step': 18526, 'epoch': 3} {'type': 'loss', 'content': 0.15362539887428284, 'timestamp': '2025-09-30 22:35:05.207078', 'step': 18527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.249803', 'step': 18527, 'epoch': 3} {'type': 'loss', 'content': 0.046572182327508926, 'timestamp': '2025-09-30 22:35:05.275305', 'step': 18528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.307786', 'step': 18528, 'epoch': 3} {'type': 'loss', 'content': 0.06944295018911362, 'timestamp': '2025-09-30 22:35:05.311540', 'step': 18529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.349026', 'step': 18529, 'epoch': 3} {'type': 'loss', 'content': 0.06586849689483643, 'timestamp': '2025-09-30 22:35:05.356540', 'step': 18530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.393160', 'step': 18530, 'epoch': 3} {'type': 'loss', 'content': 0.10682443529367447, 'timestamp': '2025-09-30 22:35:05.404036', 'step': 18531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:05.437650', 'step': 18531, 'epoch': 3} {'type': 'loss', 'content': 0.1400204449892044, 'timestamp': '2025-09-30 22:35:05.462753', 'step': 18532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:05.499793', 'step': 18532, 'epoch': 3} {'type': 'loss', 'content': 0.06740055978298187, 'timestamp': '2025-09-30 22:35:05.503808', 'step': 18533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.545285', 'step': 18533, 'epoch': 3} {'type': 'loss', 'content': 0.1371363252401352, 'timestamp': '2025-09-30 22:35:05.553798', 'step': 18534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:05.586856', 'step': 18534, 'epoch': 3} {'type': 'loss', 'content': 0.04901464283466339, 'timestamp': '2025-09-30 22:35:05.590094', 'step': 18535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:05.623368', 'step': 18535, 'epoch': 3} {'type': 'loss', 'content': 0.1341022551059723, 'timestamp': '2025-09-30 22:35:05.648736', 'step': 18536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:05.681382', 'step': 18536, 'epoch': 3} {'type': 'loss', 'content': 0.05868637561798096, 'timestamp': '2025-09-30 22:35:05.684233', 'step': 18537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.717475', 'step': 18537, 'epoch': 3} {'type': 'loss', 'content': 0.07987591624259949, 'timestamp': '2025-09-30 22:35:05.722285', 'step': 18538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.754797', 'step': 18538, 'epoch': 3} {'type': 'loss', 'content': 0.03170359134674072, 'timestamp': '2025-09-30 22:35:05.759124', 'step': 18539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.791092', 'step': 18539, 'epoch': 3} {'type': 'loss', 'content': 0.09935618937015533, 'timestamp': '2025-09-30 22:35:05.823361', 'step': 18540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:05.868728', 'step': 18540, 'epoch': 3} {'type': 'loss', 'content': 0.10042484849691391, 'timestamp': '2025-09-30 22:35:05.873196', 'step': 18541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:05.913494', 'step': 18541, 'epoch': 3} {'type': 'loss', 'content': 0.05469993129372597, 'timestamp': '2025-09-30 22:35:05.919747', 'step': 18542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:05.952167', 'step': 18542, 'epoch': 3} {'type': 'loss', 'content': 0.05097318813204765, 'timestamp': '2025-09-30 22:35:05.957057', 'step': 18543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:05.995805', 'step': 18543, 'epoch': 3} {'type': 'loss', 'content': 0.13102373480796814, 'timestamp': '2025-09-30 22:35:06.033099', 'step': 18544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.070017', 'step': 18544, 'epoch': 3} {'type': 'loss', 'content': 0.057223714888095856, 'timestamp': '2025-09-30 22:35:06.074293', 'step': 18545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:06.107659', 'step': 18545, 'epoch': 3} {'type': 'loss', 'content': 0.007276824675500393, 'timestamp': '2025-09-30 22:35:06.111093', 'step': 18546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.142623', 'step': 18546, 'epoch': 3} {'type': 'loss', 'content': 0.015353611670434475, 'timestamp': '2025-09-30 22:35:06.156074', 'step': 18547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.188160', 'step': 18547, 'epoch': 3} {'type': 'loss', 'content': 0.019341621547937393, 'timestamp': '2025-09-30 22:35:06.214139', 'step': 18548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.251002', 'step': 18548, 'epoch': 3} {'type': 'loss', 'content': 0.1092430055141449, 'timestamp': '2025-09-30 22:35:06.255354', 'step': 18549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:06.288379', 'step': 18549, 'epoch': 3} {'type': 'loss', 'content': 0.06862612068653107, 'timestamp': '2025-09-30 22:35:06.292684', 'step': 18550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.325250', 'step': 18550, 'epoch': 3} {'type': 'loss', 'content': 0.07797287404537201, 'timestamp': '2025-09-30 22:35:06.328952', 'step': 18551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.367983', 'step': 18551, 'epoch': 3} {'type': 'loss', 'content': 0.07930046319961548, 'timestamp': '2025-09-30 22:35:06.399736', 'step': 18552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.432566', 'step': 18552, 'epoch': 3} {'type': 'loss', 'content': 0.07986725866794586, 'timestamp': '2025-09-30 22:35:06.437293', 'step': 18553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.469263', 'step': 18553, 'epoch': 3} {'type': 'loss', 'content': 0.0848626047372818, 'timestamp': '2025-09-30 22:35:06.471654', 'step': 18554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:06.506535', 'step': 18554, 'epoch': 3} {'type': 'loss', 'content': 0.03217044472694397, 'timestamp': '2025-09-30 22:35:06.510244', 'step': 18555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:06.553840', 'step': 18555, 'epoch': 3} {'type': 'loss', 'content': 0.09910246729850769, 'timestamp': '2025-09-30 22:35:06.591374', 'step': 18556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:06.624313', 'step': 18556, 'epoch': 3} {'type': 'loss', 'content': 0.04168270155787468, 'timestamp': '2025-09-30 22:35:06.628098', 'step': 18557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:06.670965', 'step': 18557, 'epoch': 3} {'type': 'loss', 'content': 0.08043242245912552, 'timestamp': '2025-09-30 22:35:06.683226', 'step': 18558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.715966', 'step': 18558, 'epoch': 3} {'type': 'loss', 'content': 0.08093057572841644, 'timestamp': '2025-09-30 22:35:06.721044', 'step': 18559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.759156', 'step': 18559, 'epoch': 3} {'type': 'loss', 'content': 0.10082065314054489, 'timestamp': '2025-09-30 22:35:06.793782', 'step': 18560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:06.826906', 'step': 18560, 'epoch': 3} {'type': 'loss', 'content': 0.07162214815616608, 'timestamp': '2025-09-30 22:35:06.832093', 'step': 18561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:06.866226', 'step': 18561, 'epoch': 3} {'type': 'loss', 'content': 0.05352618545293808, 'timestamp': '2025-09-30 22:35:06.875861', 'step': 18562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:06.922036', 'step': 18562, 'epoch': 3} {'type': 'loss', 'content': 0.032791879028081894, 'timestamp': '2025-09-30 22:35:06.934979', 'step': 18563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:06.967604', 'step': 18563, 'epoch': 3} {'type': 'loss', 'content': 0.03783794119954109, 'timestamp': '2025-09-30 22:35:06.993162', 'step': 18564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:07.026448', 'step': 18564, 'epoch': 3} {'type': 'loss', 'content': 0.03936224803328514, 'timestamp': '2025-09-30 22:35:07.036323', 'step': 18565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:07.077761', 'step': 18565, 'epoch': 3} {'type': 'loss', 'content': 0.06373526155948639, 'timestamp': '2025-09-30 22:35:07.087887', 'step': 18566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:07.122315', 'step': 18566, 'epoch': 3} {'type': 'loss', 'content': 0.027203664183616638, 'timestamp': '2025-09-30 22:35:07.125961', 'step': 18567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:07.159412', 'step': 18567, 'epoch': 3} {'type': 'loss', 'content': 0.14611077308654785, 'timestamp': '2025-09-30 22:35:07.184443', 'step': 18568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:07.226294', 'step': 18568, 'epoch': 3} {'type': 'loss', 'content': 0.09271833300590515, 'timestamp': '2025-09-30 22:35:07.238775', 'step': 18569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:07.274088', 'step': 18569, 'epoch': 3} {'type': 'loss', 'content': 0.14207792282104492, 'timestamp': '2025-09-30 22:35:07.278857', 'step': 18570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.317556', 'step': 18570, 'epoch': 3} {'type': 'loss', 'content': 0.07877287268638611, 'timestamp': '2025-09-30 22:35:07.320838', 'step': 18571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 352], 'flops': 10441544708224}, 'timestamp': '2025-09-30 22:35:07.356342', 'step': 18571, 'epoch': 3} {'type': 'loss', 'content': 0.05497846007347107, 'timestamp': '2025-09-30 22:35:07.391201', 'step': 18572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:07.422170', 'step': 18572, 'epoch': 3} {'type': 'loss', 'content': 0.03843384608626366, 'timestamp': '2025-09-30 22:35:07.425861', 'step': 18573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.457148', 'step': 18573, 'epoch': 3} {'type': 'loss', 'content': 0.07796239107847214, 'timestamp': '2025-09-30 22:35:07.462096', 'step': 18574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.493659', 'step': 18574, 'epoch': 3} {'type': 'loss', 'content': 0.04924711957573891, 'timestamp': '2025-09-30 22:35:07.508287', 'step': 18575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.546080', 'step': 18575, 'epoch': 3} {'type': 'loss', 'content': 0.09920016676187515, 'timestamp': '2025-09-30 22:35:07.574504', 'step': 18576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:07.607180', 'step': 18576, 'epoch': 3} {'type': 'loss', 'content': 0.12835822999477386, 'timestamp': '2025-09-30 22:35:07.611887', 'step': 18577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.645537', 'step': 18577, 'epoch': 3} {'type': 'loss', 'content': 0.02935422956943512, 'timestamp': '2025-09-30 22:35:07.650030', 'step': 18578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:07.684231', 'step': 18578, 'epoch': 3} {'type': 'loss', 'content': 0.046846069395542145, 'timestamp': '2025-09-30 22:35:07.688373', 'step': 18579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:07.723045', 'step': 18579, 'epoch': 3} {'type': 'loss', 'content': 0.15058234333992004, 'timestamp': '2025-09-30 22:35:07.753189', 'step': 18580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.788618', 'step': 18580, 'epoch': 3} {'type': 'loss', 'content': 0.07075609266757965, 'timestamp': '2025-09-30 22:35:07.793294', 'step': 18581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:07.827108', 'step': 18581, 'epoch': 3} {'type': 'loss', 'content': 0.07058900594711304, 'timestamp': '2025-09-30 22:35:07.831220', 'step': 18582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.875294', 'step': 18582, 'epoch': 3} {'type': 'loss', 'content': 0.18013647198677063, 'timestamp': '2025-09-30 22:35:07.881573', 'step': 18583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:07.915778', 'step': 18583, 'epoch': 3} {'type': 'loss', 'content': 0.08483877032995224, 'timestamp': '2025-09-30 22:35:07.940713', 'step': 18584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:07.970886', 'step': 18584, 'epoch': 3} {'type': 'loss', 'content': 0.05757265165448189, 'timestamp': '2025-09-30 22:35:07.977714', 'step': 18585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.012734', 'step': 18585, 'epoch': 3} {'type': 'loss', 'content': 0.09801799058914185, 'timestamp': '2025-09-30 22:35:08.016240', 'step': 18586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.049932', 'step': 18586, 'epoch': 3} {'type': 'loss', 'content': 0.09885561466217041, 'timestamp': '2025-09-30 22:35:08.055670', 'step': 18587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:08.090909', 'step': 18587, 'epoch': 3} {'type': 'loss', 'content': 0.03754057735204697, 'timestamp': '2025-09-30 22:35:08.118837', 'step': 18588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:08.149938', 'step': 18588, 'epoch': 3} {'type': 'loss', 'content': 0.03409196436405182, 'timestamp': '2025-09-30 22:35:08.154501', 'step': 18589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.192748', 'step': 18589, 'epoch': 3} {'type': 'loss', 'content': 0.0662434846162796, 'timestamp': '2025-09-30 22:35:08.205729', 'step': 18590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:08.240992', 'step': 18590, 'epoch': 3} {'type': 'loss', 'content': 0.052142415195703506, 'timestamp': '2025-09-30 22:35:08.245960', 'step': 18591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:08.281572', 'step': 18591, 'epoch': 3} {'type': 'loss', 'content': 0.10702143609523773, 'timestamp': '2025-09-30 22:35:08.306363', 'step': 18592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.341318', 'step': 18592, 'epoch': 3} {'type': 'loss', 'content': 0.051903918385505676, 'timestamp': '2025-09-30 22:35:08.346872', 'step': 18593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.382400', 'step': 18593, 'epoch': 3} {'type': 'loss', 'content': 0.05838511884212494, 'timestamp': '2025-09-30 22:35:08.386466', 'step': 18594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.419809', 'step': 18594, 'epoch': 3} {'type': 'loss', 'content': 0.12811562418937683, 'timestamp': '2025-09-30 22:35:08.430577', 'step': 18595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.462438', 'step': 18595, 'epoch': 3} {'type': 'loss', 'content': 0.04306928068399429, 'timestamp': '2025-09-30 22:35:08.490350', 'step': 18596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.523634', 'step': 18596, 'epoch': 3} {'type': 'loss', 'content': 0.05799296498298645, 'timestamp': '2025-09-30 22:35:08.526992', 'step': 18597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:08.558399', 'step': 18597, 'epoch': 3} {'type': 'loss', 'content': 0.04256053641438484, 'timestamp': '2025-09-30 22:35:08.562395', 'step': 18598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.594182', 'step': 18598, 'epoch': 3} {'type': 'loss', 'content': 0.07064314931631088, 'timestamp': '2025-09-30 22:35:08.607337', 'step': 18599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.639891', 'step': 18599, 'epoch': 3} {'type': 'loss', 'content': 0.12379156053066254, 'timestamp': '2025-09-30 22:35:08.664681', 'step': 18600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:35:08.705155', 'step': 18600, 'epoch': 3} {'type': 'loss', 'content': 0.06725650280714035, 'timestamp': '2025-09-30 22:35:08.708043', 'step': 18601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.740078', 'step': 18601, 'epoch': 3} {'type': 'loss', 'content': 0.030412863940000534, 'timestamp': '2025-09-30 22:35:08.750950', 'step': 18602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.783321', 'step': 18602, 'epoch': 3} {'type': 'loss', 'content': 0.1335204839706421, 'timestamp': '2025-09-30 22:35:08.795966', 'step': 18603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.828074', 'step': 18603, 'epoch': 3} {'type': 'loss', 'content': 0.09832505881786346, 'timestamp': '2025-09-30 22:35:08.854229', 'step': 18604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:08.891065', 'step': 18604, 'epoch': 3} {'type': 'loss', 'content': 0.06110570579767227, 'timestamp': '2025-09-30 22:35:08.897812', 'step': 18605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:08.942140', 'step': 18605, 'epoch': 3} {'type': 'loss', 'content': 0.18743616342544556, 'timestamp': '2025-09-30 22:35:08.959532', 'step': 18606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:09.005164', 'step': 18606, 'epoch': 3} {'type': 'loss', 'content': 0.04424584284424782, 'timestamp': '2025-09-30 22:35:09.022230', 'step': 18607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.056265', 'step': 18607, 'epoch': 3} {'type': 'loss', 'content': 0.053129009902477264, 'timestamp': '2025-09-30 22:35:09.083543', 'step': 18608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:09.118161', 'step': 18608, 'epoch': 3} {'type': 'loss', 'content': 0.0914936363697052, 'timestamp': '2025-09-30 22:35:09.122755', 'step': 18609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:09.156797', 'step': 18609, 'epoch': 3} {'type': 'loss', 'content': 0.0991736501455307, 'timestamp': '2025-09-30 22:35:09.161114', 'step': 18610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:09.201804', 'step': 18610, 'epoch': 3} {'type': 'loss', 'content': 0.04799516126513481, 'timestamp': '2025-09-30 22:35:09.207217', 'step': 18611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:09.252819', 'step': 18611, 'epoch': 3} {'type': 'loss', 'content': 0.06957731395959854, 'timestamp': '2025-09-30 22:35:09.278606', 'step': 18612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:09.312366', 'step': 18612, 'epoch': 3} {'type': 'loss', 'content': 0.058709025382995605, 'timestamp': '2025-09-30 22:35:09.318291', 'step': 18613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.351891', 'step': 18613, 'epoch': 3} {'type': 'loss', 'content': 0.13405874371528625, 'timestamp': '2025-09-30 22:35:09.371361', 'step': 18614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.404590', 'step': 18614, 'epoch': 3} {'type': 'loss', 'content': 0.18146485090255737, 'timestamp': '2025-09-30 22:35:09.407766', 'step': 18615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.440727', 'step': 18615, 'epoch': 3} {'type': 'loss', 'content': 0.01906209997832775, 'timestamp': '2025-09-30 22:35:09.469365', 'step': 18616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:09.502140', 'step': 18616, 'epoch': 3} {'type': 'loss', 'content': 0.016603846102952957, 'timestamp': '2025-09-30 22:35:09.512783', 'step': 18617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.545984', 'step': 18617, 'epoch': 3} {'type': 'loss', 'content': 0.0783422663807869, 'timestamp': '2025-09-30 22:35:09.550237', 'step': 18618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.584636', 'step': 18618, 'epoch': 3} {'type': 'loss', 'content': 0.05583041533827782, 'timestamp': '2025-09-30 22:35:09.593132', 'step': 18619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:09.629557', 'step': 18619, 'epoch': 3} {'type': 'loss', 'content': 0.04747536778450012, 'timestamp': '2025-09-30 22:35:09.655621', 'step': 18620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:09.688028', 'step': 18620, 'epoch': 3} {'type': 'loss', 'content': 0.09730032086372375, 'timestamp': '2025-09-30 22:35:09.698890', 'step': 18621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.745012', 'step': 18621, 'epoch': 3} {'type': 'loss', 'content': 0.11425688862800598, 'timestamp': '2025-09-30 22:35:09.761543', 'step': 18622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:09.820680', 'step': 18622, 'epoch': 3} {'type': 'loss', 'content': 0.06920571625232697, 'timestamp': '2025-09-30 22:35:09.826318', 'step': 18623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:09.866676', 'step': 18623, 'epoch': 3} {'type': 'loss', 'content': 0.08261705935001373, 'timestamp': '2025-09-30 22:35:09.891605', 'step': 18624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:09.933459', 'step': 18624, 'epoch': 3} {'type': 'loss', 'content': 0.11181632429361343, 'timestamp': '2025-09-30 22:35:09.936618', 'step': 18625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:09.975521', 'step': 18625, 'epoch': 3} {'type': 'loss', 'content': 0.0830930694937706, 'timestamp': '2025-09-30 22:35:09.981512', 'step': 18626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.023116', 'step': 18626, 'epoch': 3} {'type': 'loss', 'content': 0.10302462428808212, 'timestamp': '2025-09-30 22:35:10.027027', 'step': 18627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.075796', 'step': 18627, 'epoch': 3} {'type': 'loss', 'content': 0.1234905868768692, 'timestamp': '2025-09-30 22:35:10.111304', 'step': 18628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:10.160495', 'step': 18628, 'epoch': 3} {'type': 'loss', 'content': 0.08114293217658997, 'timestamp': '2025-09-30 22:35:10.163948', 'step': 18629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:10.221961', 'step': 18629, 'epoch': 3} {'type': 'loss', 'content': 0.024119310081005096, 'timestamp': '2025-09-30 22:35:10.229328', 'step': 18630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:10.267833', 'step': 18630, 'epoch': 3} {'type': 'loss', 'content': 0.06694834679365158, 'timestamp': '2025-09-30 22:35:10.275357', 'step': 18631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:10.317575', 'step': 18631, 'epoch': 3} {'type': 'loss', 'content': 0.058880239725112915, 'timestamp': '2025-09-30 22:35:10.341533', 'step': 18632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.383352', 'step': 18632, 'epoch': 3} {'type': 'loss', 'content': 0.08390079438686371, 'timestamp': '2025-09-30 22:35:10.387431', 'step': 18633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.441851', 'step': 18633, 'epoch': 3} {'type': 'loss', 'content': 0.13712501525878906, 'timestamp': '2025-09-30 22:35:10.444980', 'step': 18634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:10.485554', 'step': 18634, 'epoch': 3} {'type': 'loss', 'content': 0.11112203449010849, 'timestamp': '2025-09-30 22:35:10.489834', 'step': 18635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.533222', 'step': 18635, 'epoch': 3} {'type': 'loss', 'content': 0.10797778517007828, 'timestamp': '2025-09-30 22:35:10.567109', 'step': 18636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.601875', 'step': 18636, 'epoch': 3} {'type': 'loss', 'content': 0.03916693478822708, 'timestamp': '2025-09-30 22:35:10.607634', 'step': 18637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:10.642570', 'step': 18637, 'epoch': 3} {'type': 'loss', 'content': 0.04668442904949188, 'timestamp': '2025-09-30 22:35:10.653003', 'step': 18638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:10.697178', 'step': 18638, 'epoch': 3} {'type': 'loss', 'content': 0.09943177551031113, 'timestamp': '2025-09-30 22:35:10.699912', 'step': 18639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:10.759934', 'step': 18639, 'epoch': 3} {'type': 'loss', 'content': 0.06258808821439743, 'timestamp': '2025-09-30 22:35:10.787144', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:35:18.456696', 'step': 18640, 'epoch': 3} {'type': 'pplx', 'content': 15837.226247975088, 'timestamp': '2025-09-30 22:35:18.463029', 'step': 18640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:18.496253', 'step': 18640, 'epoch': 3} {'type': 'loss', 'content': 0.10980916023254395, 'timestamp': '2025-09-30 22:35:18.504239', 'step': 18641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.543638', 'step': 18641, 'epoch': 3} {'type': 'loss', 'content': 0.0235125869512558, 'timestamp': '2025-09-30 22:35:18.547333', 'step': 18642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.579806', 'step': 18642, 'epoch': 3} {'type': 'loss', 'content': 0.1120070368051529, 'timestamp': '2025-09-30 22:35:18.582933', 'step': 18643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.614792', 'step': 18643, 'epoch': 3} {'type': 'loss', 'content': 0.07156652957201004, 'timestamp': '2025-09-30 22:35:18.639662', 'step': 18644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.669685', 'step': 18644, 'epoch': 3} {'type': 'loss', 'content': 0.10278602689504623, 'timestamp': '2025-09-30 22:35:18.677453', 'step': 18645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:18.712494', 'step': 18645, 'epoch': 3} {'type': 'loss', 'content': 0.10303576290607452, 'timestamp': '2025-09-30 22:35:18.720165', 'step': 18646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.756006', 'step': 18646, 'epoch': 3} {'type': 'loss', 'content': 0.09242856502532959, 'timestamp': '2025-09-30 22:35:18.758831', 'step': 18647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:18.796090', 'step': 18647, 'epoch': 3} {'type': 'loss', 'content': 0.05749311298131943, 'timestamp': '2025-09-30 22:35:18.822296', 'step': 18648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:18.854976', 'step': 18648, 'epoch': 3} {'type': 'loss', 'content': 0.025715656578540802, 'timestamp': '2025-09-30 22:35:18.858467', 'step': 18649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:18.889664', 'step': 18649, 'epoch': 3} {'type': 'loss', 'content': 0.10126648843288422, 'timestamp': '2025-09-30 22:35:18.895004', 'step': 18650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.924782', 'step': 18650, 'epoch': 3} {'type': 'loss', 'content': 0.0712675228714943, 'timestamp': '2025-09-30 22:35:18.938520', 'step': 18651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:18.973185', 'step': 18651, 'epoch': 3} {'type': 'loss', 'content': 0.1507093757390976, 'timestamp': '2025-09-30 22:35:19.000936', 'step': 18652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.033790', 'step': 18652, 'epoch': 3} {'type': 'loss', 'content': 0.12638789415359497, 'timestamp': '2025-09-30 22:35:19.043133', 'step': 18653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.078704', 'step': 18653, 'epoch': 3} {'type': 'loss', 'content': 0.0601157620549202, 'timestamp': '2025-09-30 22:35:19.081775', 'step': 18654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:19.118513', 'step': 18654, 'epoch': 3} {'type': 'loss', 'content': 0.028711190447211266, 'timestamp': '2025-09-30 22:35:19.122267', 'step': 18655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.161336', 'step': 18655, 'epoch': 3} {'type': 'loss', 'content': 0.07904496043920517, 'timestamp': '2025-09-30 22:35:19.187362', 'step': 18656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.219266', 'step': 18656, 'epoch': 3} {'type': 'loss', 'content': 0.060721855610609055, 'timestamp': '2025-09-30 22:35:19.222126', 'step': 18657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:19.261501', 'step': 18657, 'epoch': 3} {'type': 'loss', 'content': 0.007261989638209343, 'timestamp': '2025-09-30 22:35:19.264126', 'step': 18658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:19.293890', 'step': 18658, 'epoch': 3} {'type': 'loss', 'content': 0.06821910291910172, 'timestamp': '2025-09-30 22:35:19.303496', 'step': 18659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.336504', 'step': 18659, 'epoch': 3} {'type': 'loss', 'content': 0.07715118676424026, 'timestamp': '2025-09-30 22:35:19.361699', 'step': 18660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.396460', 'step': 18660, 'epoch': 3} {'type': 'loss', 'content': 0.1303168386220932, 'timestamp': '2025-09-30 22:35:19.399935', 'step': 18661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.432155', 'step': 18661, 'epoch': 3} {'type': 'loss', 'content': 0.051494844257831573, 'timestamp': '2025-09-30 22:35:19.435089', 'step': 18662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:19.468064', 'step': 18662, 'epoch': 3} {'type': 'loss', 'content': 0.13703911006450653, 'timestamp': '2025-09-30 22:35:19.471622', 'step': 18663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.505299', 'step': 18663, 'epoch': 3} {'type': 'loss', 'content': 0.12187477201223373, 'timestamp': '2025-09-30 22:35:19.532529', 'step': 18664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:19.570045', 'step': 18664, 'epoch': 3} {'type': 'loss', 'content': 0.0755772814154625, 'timestamp': '2025-09-30 22:35:19.577350', 'step': 18665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.609934', 'step': 18665, 'epoch': 3} {'type': 'loss', 'content': 0.07308921217918396, 'timestamp': '2025-09-30 22:35:19.614092', 'step': 18666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.646975', 'step': 18666, 'epoch': 3} {'type': 'loss', 'content': 0.06481746584177017, 'timestamp': '2025-09-30 22:35:19.650818', 'step': 18667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:19.682814', 'step': 18667, 'epoch': 3} {'type': 'loss', 'content': 0.06765631586313248, 'timestamp': '2025-09-30 22:35:19.707511', 'step': 18668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.746431', 'step': 18668, 'epoch': 3} {'type': 'loss', 'content': 0.12259434163570404, 'timestamp': '2025-09-30 22:35:19.751747', 'step': 18669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.783243', 'step': 18669, 'epoch': 3} {'type': 'loss', 'content': 0.06979582458734512, 'timestamp': '2025-09-30 22:35:19.787704', 'step': 18670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.819784', 'step': 18670, 'epoch': 3} {'type': 'loss', 'content': 0.09172552078962326, 'timestamp': '2025-09-30 22:35:19.823338', 'step': 18671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:19.859137', 'step': 18671, 'epoch': 3} {'type': 'loss', 'content': 0.01574382185935974, 'timestamp': '2025-09-30 22:35:19.883552', 'step': 18672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.914158', 'step': 18672, 'epoch': 3} {'type': 'loss', 'content': 0.06656226515769958, 'timestamp': '2025-09-30 22:35:19.917680', 'step': 18673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:19.947869', 'step': 18673, 'epoch': 3} {'type': 'loss', 'content': 0.06345397233963013, 'timestamp': '2025-09-30 22:35:19.955944', 'step': 18674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:19.986692', 'step': 18674, 'epoch': 3} {'type': 'loss', 'content': 0.07437022030353546, 'timestamp': '2025-09-30 22:35:19.990850', 'step': 18675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.026679', 'step': 18675, 'epoch': 3} {'type': 'loss', 'content': 0.0811004713177681, 'timestamp': '2025-09-30 22:35:20.055197', 'step': 18676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:20.091556', 'step': 18676, 'epoch': 3} {'type': 'loss', 'content': 0.05830724909901619, 'timestamp': '2025-09-30 22:35:20.094045', 'step': 18677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:35:20.124185', 'step': 18677, 'epoch': 3} {'type': 'loss', 'content': 0.09556306153535843, 'timestamp': '2025-09-30 22:35:20.129135', 'step': 18678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.165012', 'step': 18678, 'epoch': 3} {'type': 'loss', 'content': 0.05637754127383232, 'timestamp': '2025-09-30 22:35:20.173018', 'step': 18679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.202966', 'step': 18679, 'epoch': 3} {'type': 'loss', 'content': 0.03529347851872444, 'timestamp': '2025-09-30 22:35:20.233975', 'step': 18680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.264438', 'step': 18680, 'epoch': 3} {'type': 'loss', 'content': 0.029717639088630676, 'timestamp': '2025-09-30 22:35:20.276521', 'step': 18681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:20.308354', 'step': 18681, 'epoch': 3} {'type': 'loss', 'content': 0.04772615432739258, 'timestamp': '2025-09-30 22:35:20.319538', 'step': 18682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.356257', 'step': 18682, 'epoch': 3} {'type': 'loss', 'content': 0.05233404412865639, 'timestamp': '2025-09-30 22:35:20.359226', 'step': 18683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:20.390267', 'step': 18683, 'epoch': 3} {'type': 'loss', 'content': 0.07464014738798141, 'timestamp': '2025-09-30 22:35:20.414809', 'step': 18684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:20.445538', 'step': 18684, 'epoch': 3} {'type': 'loss', 'content': 0.03727293014526367, 'timestamp': '2025-09-30 22:35:20.455726', 'step': 18685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:20.486890', 'step': 18685, 'epoch': 3} {'type': 'loss', 'content': 0.04284399002790451, 'timestamp': '2025-09-30 22:35:20.497618', 'step': 18686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.530504', 'step': 18686, 'epoch': 3} {'type': 'loss', 'content': 0.03205444663763046, 'timestamp': '2025-09-30 22:35:20.537341', 'step': 18687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.569430', 'step': 18687, 'epoch': 3} {'type': 'loss', 'content': 0.10714907944202423, 'timestamp': '2025-09-30 22:35:20.604963', 'step': 18688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.636334', 'step': 18688, 'epoch': 3} {'type': 'loss', 'content': 0.08006123453378677, 'timestamp': '2025-09-30 22:35:20.651132', 'step': 18689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:20.689314', 'step': 18689, 'epoch': 3} {'type': 'loss', 'content': 0.04286225885152817, 'timestamp': '2025-09-30 22:35:20.695693', 'step': 18690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.733806', 'step': 18690, 'epoch': 3} {'type': 'loss', 'content': 0.044342320412397385, 'timestamp': '2025-09-30 22:35:20.739139', 'step': 18691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.773053', 'step': 18691, 'epoch': 3} {'type': 'loss', 'content': 0.0648450031876564, 'timestamp': '2025-09-30 22:35:20.798540', 'step': 18692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.833501', 'step': 18692, 'epoch': 3} {'type': 'loss', 'content': 0.06264661252498627, 'timestamp': '2025-09-30 22:35:20.841109', 'step': 18693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:20.874024', 'step': 18693, 'epoch': 3} {'type': 'loss', 'content': 0.12412039935588837, 'timestamp': '2025-09-30 22:35:20.881698', 'step': 18694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:20.912936', 'step': 18694, 'epoch': 3} {'type': 'loss', 'content': 0.022675197571516037, 'timestamp': '2025-09-30 22:35:20.922952', 'step': 18695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:20.966989', 'step': 18695, 'epoch': 3} {'type': 'loss', 'content': 0.0183414313942194, 'timestamp': '2025-09-30 22:35:20.993407', 'step': 18696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.024613', 'step': 18696, 'epoch': 3} {'type': 'loss', 'content': 0.08296138793230057, 'timestamp': '2025-09-30 22:35:21.031299', 'step': 18697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.064974', 'step': 18697, 'epoch': 3} {'type': 'loss', 'content': 0.05631284788250923, 'timestamp': '2025-09-30 22:35:21.073233', 'step': 18698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.104895', 'step': 18698, 'epoch': 3} {'type': 'loss', 'content': 0.05068913474678993, 'timestamp': '2025-09-30 22:35:21.109000', 'step': 18699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.145114', 'step': 18699, 'epoch': 3} {'type': 'loss', 'content': 0.0574488528072834, 'timestamp': '2025-09-30 22:35:21.176559', 'step': 18700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.213617', 'step': 18700, 'epoch': 3} {'type': 'loss', 'content': 0.05661952123045921, 'timestamp': '2025-09-30 22:35:21.218057', 'step': 18701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:21.249193', 'step': 18701, 'epoch': 3} {'type': 'loss', 'content': 0.048972319811582565, 'timestamp': '2025-09-30 22:35:21.259780', 'step': 18702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.300338', 'step': 18702, 'epoch': 3} {'type': 'loss', 'content': 0.06614143401384354, 'timestamp': '2025-09-30 22:35:21.313094', 'step': 18703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.350319', 'step': 18703, 'epoch': 3} {'type': 'loss', 'content': 0.045517776161432266, 'timestamp': '2025-09-30 22:35:21.377164', 'step': 18704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.415071', 'step': 18704, 'epoch': 3} {'type': 'loss', 'content': 0.09871530532836914, 'timestamp': '2025-09-30 22:35:21.420236', 'step': 18705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.452865', 'step': 18705, 'epoch': 3} {'type': 'loss', 'content': 0.028920425102114677, 'timestamp': '2025-09-30 22:35:21.455630', 'step': 18706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.492143', 'step': 18706, 'epoch': 3} {'type': 'loss', 'content': 0.06314371526241302, 'timestamp': '2025-09-30 22:35:21.497451', 'step': 18707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:21.535806', 'step': 18707, 'epoch': 3} {'type': 'loss', 'content': 0.10766188055276871, 'timestamp': '2025-09-30 22:35:21.567765', 'step': 18708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:21.598929', 'step': 18708, 'epoch': 3} {'type': 'loss', 'content': 0.05283190310001373, 'timestamp': '2025-09-30 22:35:21.608603', 'step': 18709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:21.640852', 'step': 18709, 'epoch': 3} {'type': 'loss', 'content': 0.056712884455919266, 'timestamp': '2025-09-30 22:35:21.645857', 'step': 18710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:21.678799', 'step': 18710, 'epoch': 3} {'type': 'loss', 'content': 0.07909205555915833, 'timestamp': '2025-09-30 22:35:21.684235', 'step': 18711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:21.717536', 'step': 18711, 'epoch': 3} {'type': 'loss', 'content': 0.04817643761634827, 'timestamp': '2025-09-30 22:35:21.744425', 'step': 18712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.777037', 'step': 18712, 'epoch': 3} {'type': 'loss', 'content': 0.06899193674325943, 'timestamp': '2025-09-30 22:35:21.786533', 'step': 18713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:21.817179', 'step': 18713, 'epoch': 3} {'type': 'loss', 'content': 0.08730066567659378, 'timestamp': '2025-09-30 22:35:21.827559', 'step': 18714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.867109', 'step': 18714, 'epoch': 3} {'type': 'loss', 'content': 0.10445787012577057, 'timestamp': '2025-09-30 22:35:21.874923', 'step': 18715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:21.907471', 'step': 18715, 'epoch': 3} {'type': 'loss', 'content': 0.03438348323106766, 'timestamp': '2025-09-30 22:35:21.932431', 'step': 18716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:21.964464', 'step': 18716, 'epoch': 3} {'type': 'loss', 'content': 0.030429603531956673, 'timestamp': '2025-09-30 22:35:21.967751', 'step': 18717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:22.000281', 'step': 18717, 'epoch': 3} {'type': 'loss', 'content': 0.03234802931547165, 'timestamp': '2025-09-30 22:35:22.004854', 'step': 18718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.044156', 'step': 18718, 'epoch': 3} {'type': 'loss', 'content': 0.10337503254413605, 'timestamp': '2025-09-30 22:35:22.049404', 'step': 18719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.079809', 'step': 18719, 'epoch': 3} {'type': 'loss', 'content': 0.03596355393528938, 'timestamp': '2025-09-30 22:35:22.106931', 'step': 18720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.149169', 'step': 18720, 'epoch': 3} {'type': 'loss', 'content': 0.0413636788725853, 'timestamp': '2025-09-30 22:35:22.159954', 'step': 18721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.200908', 'step': 18721, 'epoch': 3} {'type': 'loss', 'content': 0.06465300172567368, 'timestamp': '2025-09-30 22:35:22.204554', 'step': 18722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.241306', 'step': 18722, 'epoch': 3} {'type': 'loss', 'content': 0.020901592448353767, 'timestamp': '2025-09-30 22:35:22.252095', 'step': 18723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.292436', 'step': 18723, 'epoch': 3} {'type': 'loss', 'content': 0.1578289419412613, 'timestamp': '2025-09-30 22:35:22.319137', 'step': 18724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.352063', 'step': 18724, 'epoch': 3} {'type': 'loss', 'content': 0.013933584094047546, 'timestamp': '2025-09-30 22:35:22.355530', 'step': 18725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.388433', 'step': 18725, 'epoch': 3} {'type': 'loss', 'content': 0.024459032341837883, 'timestamp': '2025-09-30 22:35:22.392528', 'step': 18726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.422861', 'step': 18726, 'epoch': 3} {'type': 'loss', 'content': 0.04830509424209595, 'timestamp': '2025-09-30 22:35:22.430121', 'step': 18727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:22.460712', 'step': 18727, 'epoch': 3} {'type': 'loss', 'content': 0.016269149258732796, 'timestamp': '2025-09-30 22:35:22.485866', 'step': 18728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.523077', 'step': 18728, 'epoch': 3} {'type': 'loss', 'content': 0.09114459156990051, 'timestamp': '2025-09-30 22:35:22.529911', 'step': 18729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:22.563293', 'step': 18729, 'epoch': 3} {'type': 'loss', 'content': 0.04596784710884094, 'timestamp': '2025-09-30 22:35:22.570004', 'step': 18730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:22.604882', 'step': 18730, 'epoch': 3} {'type': 'loss', 'content': 0.0666024386882782, 'timestamp': '2025-09-30 22:35:22.608633', 'step': 18731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.639313', 'step': 18731, 'epoch': 3} {'type': 'loss', 'content': 0.057132452726364136, 'timestamp': '2025-09-30 22:35:22.664641', 'step': 18732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.700841', 'step': 18732, 'epoch': 3} {'type': 'loss', 'content': 0.062204089015722275, 'timestamp': '2025-09-30 22:35:22.707111', 'step': 18733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.746701', 'step': 18733, 'epoch': 3} {'type': 'loss', 'content': 0.016536014154553413, 'timestamp': '2025-09-30 22:35:22.754487', 'step': 18734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:22.786176', 'step': 18734, 'epoch': 3} {'type': 'loss', 'content': 0.04303274676203728, 'timestamp': '2025-09-30 22:35:22.795690', 'step': 18735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:22.836431', 'step': 18735, 'epoch': 3} {'type': 'loss', 'content': 0.05400014668703079, 'timestamp': '2025-09-30 22:35:22.865990', 'step': 18736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:22.897094', 'step': 18736, 'epoch': 3} {'type': 'loss', 'content': 0.07647334039211273, 'timestamp': '2025-09-30 22:35:22.905229', 'step': 18737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:22.945353', 'step': 18737, 'epoch': 3} {'type': 'loss', 'content': 0.019845370203256607, 'timestamp': '2025-09-30 22:35:22.952338', 'step': 18738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:22.987052', 'step': 18738, 'epoch': 3} {'type': 'loss', 'content': 0.10824399441480637, 'timestamp': '2025-09-30 22:35:22.996635', 'step': 18739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.031114', 'step': 18739, 'epoch': 3} {'type': 'loss', 'content': 0.06554064899682999, 'timestamp': '2025-09-30 22:35:23.056218', 'step': 18740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.100634', 'step': 18740, 'epoch': 3} {'type': 'loss', 'content': 0.04312540590763092, 'timestamp': '2025-09-30 22:35:23.105042', 'step': 18741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.136075', 'step': 18741, 'epoch': 3} {'type': 'loss', 'content': 0.07832525670528412, 'timestamp': '2025-09-30 22:35:23.143648', 'step': 18742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.182535', 'step': 18742, 'epoch': 3} {'type': 'loss', 'content': 0.019033579155802727, 'timestamp': '2025-09-30 22:35:23.186292', 'step': 18743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.218257', 'step': 18743, 'epoch': 3} {'type': 'loss', 'content': 0.08004021644592285, 'timestamp': '2025-09-30 22:35:23.245796', 'step': 18744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.295555', 'step': 18744, 'epoch': 3} {'type': 'loss', 'content': 0.0273837149143219, 'timestamp': '2025-09-30 22:35:23.301805', 'step': 18745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:23.333646', 'step': 18745, 'epoch': 3} {'type': 'loss', 'content': 0.12662570178508759, 'timestamp': '2025-09-30 22:35:23.336912', 'step': 18746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.369779', 'step': 18746, 'epoch': 3} {'type': 'loss', 'content': 0.016466932371258736, 'timestamp': '2025-09-30 22:35:23.378244', 'step': 18747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.412924', 'step': 18747, 'epoch': 3} {'type': 'loss', 'content': 0.11388717591762543, 'timestamp': '2025-09-30 22:35:23.441161', 'step': 18748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:23.476311', 'step': 18748, 'epoch': 3} {'type': 'loss', 'content': 0.07560772448778152, 'timestamp': '2025-09-30 22:35:23.479342', 'step': 18749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.521697', 'step': 18749, 'epoch': 3} {'type': 'loss', 'content': 0.04428736865520477, 'timestamp': '2025-09-30 22:35:23.525552', 'step': 18750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.556912', 'step': 18750, 'epoch': 3} {'type': 'loss', 'content': 0.025495227426290512, 'timestamp': '2025-09-30 22:35:23.563024', 'step': 18751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:23.597243', 'step': 18751, 'epoch': 3} {'type': 'loss', 'content': 0.09548532217741013, 'timestamp': '2025-09-30 22:35:23.623537', 'step': 18752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.659455', 'step': 18752, 'epoch': 3} {'type': 'loss', 'content': 0.05937248840928078, 'timestamp': '2025-09-30 22:35:23.663140', 'step': 18753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.695223', 'step': 18753, 'epoch': 3} {'type': 'loss', 'content': 0.07654359191656113, 'timestamp': '2025-09-30 22:35:23.700722', 'step': 18754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.739808', 'step': 18754, 'epoch': 3} {'type': 'loss', 'content': 0.05593137443065643, 'timestamp': '2025-09-30 22:35:23.748918', 'step': 18755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.793251', 'step': 18755, 'epoch': 3} {'type': 'loss', 'content': 0.08973604440689087, 'timestamp': '2025-09-30 22:35:23.824804', 'step': 18756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.856482', 'step': 18756, 'epoch': 3} {'type': 'loss', 'content': 0.05975719913840294, 'timestamp': '2025-09-30 22:35:23.860804', 'step': 18757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:23.893205', 'step': 18757, 'epoch': 3} {'type': 'loss', 'content': 0.05196525156497955, 'timestamp': '2025-09-30 22:35:23.909643', 'step': 18758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.941302', 'step': 18758, 'epoch': 3} {'type': 'loss', 'content': 0.03908794745802879, 'timestamp': '2025-09-30 22:35:23.951538', 'step': 18759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:23.983641', 'step': 18759, 'epoch': 3} {'type': 'loss', 'content': 0.05395203083753586, 'timestamp': '2025-09-30 22:35:24.016316', 'step': 18760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:24.054597', 'step': 18760, 'epoch': 3} {'type': 'loss', 'content': 0.06659390777349472, 'timestamp': '2025-09-30 22:35:24.061617', 'step': 18761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.100582', 'step': 18761, 'epoch': 3} {'type': 'loss', 'content': 0.06257426738739014, 'timestamp': '2025-09-30 22:35:24.105192', 'step': 18762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:24.144724', 'step': 18762, 'epoch': 3} {'type': 'loss', 'content': 0.010645514354109764, 'timestamp': '2025-09-30 22:35:24.149567', 'step': 18763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:24.195714', 'step': 18763, 'epoch': 3} {'type': 'loss', 'content': 0.15967915952205658, 'timestamp': '2025-09-30 22:35:24.221065', 'step': 18764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.257485', 'step': 18764, 'epoch': 3} {'type': 'loss', 'content': 0.09419926255941391, 'timestamp': '2025-09-30 22:35:24.263644', 'step': 18765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:24.303768', 'step': 18765, 'epoch': 3} {'type': 'loss', 'content': 0.03058568388223648, 'timestamp': '2025-09-30 22:35:24.318646', 'step': 18766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:24.354553', 'step': 18766, 'epoch': 3} {'type': 'loss', 'content': 0.11283937096595764, 'timestamp': '2025-09-30 22:35:24.357061', 'step': 18767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.387954', 'step': 18767, 'epoch': 3} {'type': 'loss', 'content': 0.06669843196868896, 'timestamp': '2025-09-30 22:35:24.412247', 'step': 18768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.449538', 'step': 18768, 'epoch': 3} {'type': 'loss', 'content': 0.09191884845495224, 'timestamp': '2025-09-30 22:35:24.453801', 'step': 18769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.500800', 'step': 18769, 'epoch': 3} {'type': 'loss', 'content': 0.061557069420814514, 'timestamp': '2025-09-30 22:35:24.514391', 'step': 18770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:24.548514', 'step': 18770, 'epoch': 3} {'type': 'loss', 'content': 0.0682944729924202, 'timestamp': '2025-09-30 22:35:24.553229', 'step': 18771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.584229', 'step': 18771, 'epoch': 3} {'type': 'loss', 'content': 0.0481099970638752, 'timestamp': '2025-09-30 22:35:24.610022', 'step': 18772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:24.643047', 'step': 18772, 'epoch': 3} {'type': 'loss', 'content': 0.09285648912191391, 'timestamp': '2025-09-30 22:35:24.647794', 'step': 18773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:24.687162', 'step': 18773, 'epoch': 3} {'type': 'loss', 'content': 0.06761722266674042, 'timestamp': '2025-09-30 22:35:24.699807', 'step': 18774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:24.743638', 'step': 18774, 'epoch': 3} {'type': 'loss', 'content': 0.09916005283594131, 'timestamp': '2025-09-30 22:35:24.747115', 'step': 18775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.779085', 'step': 18775, 'epoch': 3} {'type': 'loss', 'content': 0.08942452818155289, 'timestamp': '2025-09-30 22:35:24.804015', 'step': 18776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.836418', 'step': 18776, 'epoch': 3} {'type': 'loss', 'content': 0.09212858229875565, 'timestamp': '2025-09-30 22:35:24.840114', 'step': 18777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:24.874519', 'step': 18777, 'epoch': 3} {'type': 'loss', 'content': 0.09069608151912689, 'timestamp': '2025-09-30 22:35:24.877650', 'step': 18778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:24.912753', 'step': 18778, 'epoch': 3} {'type': 'loss', 'content': 0.0310569666326046, 'timestamp': '2025-09-30 22:35:24.917245', 'step': 18779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:24.948815', 'step': 18779, 'epoch': 3} {'type': 'loss', 'content': 0.034148890525102615, 'timestamp': '2025-09-30 22:35:24.973880', 'step': 18780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.004536', 'step': 18780, 'epoch': 3} {'type': 'loss', 'content': 0.10135926306247711, 'timestamp': '2025-09-30 22:35:25.015612', 'step': 18781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.049082', 'step': 18781, 'epoch': 3} {'type': 'loss', 'content': 0.06925362348556519, 'timestamp': '2025-09-30 22:35:25.055136', 'step': 18782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:25.087266', 'step': 18782, 'epoch': 3} {'type': 'loss', 'content': 0.08915506303310394, 'timestamp': '2025-09-30 22:35:25.092239', 'step': 18783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:25.128495', 'step': 18783, 'epoch': 3} {'type': 'loss', 'content': 0.03735100105404854, 'timestamp': '2025-09-30 22:35:25.153257', 'step': 18784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:25.184376', 'step': 18784, 'epoch': 3} {'type': 'loss', 'content': 0.14059971272945404, 'timestamp': '2025-09-30 22:35:25.187904', 'step': 18785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.225353', 'step': 18785, 'epoch': 3} {'type': 'loss', 'content': 0.13047051429748535, 'timestamp': '2025-09-30 22:35:25.229783', 'step': 18786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.261732', 'step': 18786, 'epoch': 3} {'type': 'loss', 'content': 0.020815543830394745, 'timestamp': '2025-09-30 22:35:25.267217', 'step': 18787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:25.299680', 'step': 18787, 'epoch': 3} {'type': 'loss', 'content': 0.03276346996426582, 'timestamp': '2025-09-30 22:35:25.324814', 'step': 18788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.357409', 'step': 18788, 'epoch': 3} {'type': 'loss', 'content': 0.0589727908372879, 'timestamp': '2025-09-30 22:35:25.364037', 'step': 18789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:25.408809', 'step': 18789, 'epoch': 3} {'type': 'loss', 'content': 0.058042749762535095, 'timestamp': '2025-09-30 22:35:25.412593', 'step': 18790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.448535', 'step': 18790, 'epoch': 3} {'type': 'loss', 'content': 0.020273851230740547, 'timestamp': '2025-09-30 22:35:25.451737', 'step': 18791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:25.490858', 'step': 18791, 'epoch': 3} {'type': 'loss', 'content': 0.054622773081064224, 'timestamp': '2025-09-30 22:35:25.516234', 'step': 18792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.553612', 'step': 18792, 'epoch': 3} {'type': 'loss', 'content': 0.1256212592124939, 'timestamp': '2025-09-30 22:35:25.561572', 'step': 18793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.595632', 'step': 18793, 'epoch': 3} {'type': 'loss', 'content': 0.07841120660305023, 'timestamp': '2025-09-30 22:35:25.599848', 'step': 18794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.639190', 'step': 18794, 'epoch': 3} {'type': 'loss', 'content': 0.03864211216568947, 'timestamp': '2025-09-30 22:35:25.645055', 'step': 18795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.676339', 'step': 18795, 'epoch': 3} {'type': 'loss', 'content': 0.0032955645583570004, 'timestamp': '2025-09-30 22:35:25.700523', 'step': 18796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:25.732583', 'step': 18796, 'epoch': 3} {'type': 'loss', 'content': 0.022406041622161865, 'timestamp': '2025-09-30 22:35:25.735455', 'step': 18797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.784370', 'step': 18797, 'epoch': 3} {'type': 'loss', 'content': 0.06002804636955261, 'timestamp': '2025-09-30 22:35:25.788902', 'step': 18798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:25.819949', 'step': 18798, 'epoch': 3} {'type': 'loss', 'content': 0.09304355084896088, 'timestamp': '2025-09-30 22:35:25.827522', 'step': 18799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:25.859526', 'step': 18799, 'epoch': 3} {'type': 'loss', 'content': 0.03639744967222214, 'timestamp': '2025-09-30 22:35:25.883826', 'step': 18800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:25.921227', 'step': 18800, 'epoch': 3} {'type': 'loss', 'content': 0.08559936285018921, 'timestamp': '2025-09-30 22:35:25.926159', 'step': 18801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:25.961272', 'step': 18801, 'epoch': 3} {'type': 'loss', 'content': 0.05314808711409569, 'timestamp': '2025-09-30 22:35:25.965794', 'step': 18802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:25.997241', 'step': 18802, 'epoch': 3} {'type': 'loss', 'content': 0.040454961359500885, 'timestamp': '2025-09-30 22:35:26.000872', 'step': 18803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.036212', 'step': 18803, 'epoch': 3} {'type': 'loss', 'content': 0.12048855423927307, 'timestamp': '2025-09-30 22:35:26.062753', 'step': 18804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.093745', 'step': 18804, 'epoch': 3} {'type': 'loss', 'content': 0.09870965778827667, 'timestamp': '2025-09-30 22:35:26.097024', 'step': 18805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.126480', 'step': 18805, 'epoch': 3} {'type': 'loss', 'content': 0.04834127426147461, 'timestamp': '2025-09-30 22:35:26.132044', 'step': 18806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:26.165326', 'step': 18806, 'epoch': 3} {'type': 'loss', 'content': 0.06547274440526962, 'timestamp': '2025-09-30 22:35:26.170340', 'step': 18807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.203642', 'step': 18807, 'epoch': 3} {'type': 'loss', 'content': 0.0736926794052124, 'timestamp': '2025-09-30 22:35:26.228740', 'step': 18808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.260407', 'step': 18808, 'epoch': 3} {'type': 'loss', 'content': 0.11956251412630081, 'timestamp': '2025-09-30 22:35:26.264289', 'step': 18809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.296990', 'step': 18809, 'epoch': 3} {'type': 'loss', 'content': 0.08081299811601639, 'timestamp': '2025-09-30 22:35:26.301700', 'step': 18810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.334592', 'step': 18810, 'epoch': 3} {'type': 'loss', 'content': 0.07574708014726639, 'timestamp': '2025-09-30 22:35:26.338952', 'step': 18811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:26.370756', 'step': 18811, 'epoch': 3} {'type': 'loss', 'content': 0.07275084406137466, 'timestamp': '2025-09-30 22:35:26.395665', 'step': 18812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.427192', 'step': 18812, 'epoch': 3} {'type': 'loss', 'content': 0.017661908641457558, 'timestamp': '2025-09-30 22:35:26.433139', 'step': 18813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:26.463615', 'step': 18813, 'epoch': 3} {'type': 'loss', 'content': 0.06253361701965332, 'timestamp': '2025-09-30 22:35:26.467727', 'step': 18814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:26.499418', 'step': 18814, 'epoch': 3} {'type': 'loss', 'content': 0.11319391429424286, 'timestamp': '2025-09-30 22:35:26.504425', 'step': 18815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.537980', 'step': 18815, 'epoch': 3} {'type': 'loss', 'content': 0.053405825048685074, 'timestamp': '2025-09-30 22:35:26.564146', 'step': 18816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.604167', 'step': 18816, 'epoch': 3} {'type': 'loss', 'content': 0.0807001069188118, 'timestamp': '2025-09-30 22:35:26.608679', 'step': 18817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.640576', 'step': 18817, 'epoch': 3} {'type': 'loss', 'content': 0.09345125406980515, 'timestamp': '2025-09-30 22:35:26.646328', 'step': 18818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:26.679320', 'step': 18818, 'epoch': 3} {'type': 'loss', 'content': 0.08423921465873718, 'timestamp': '2025-09-30 22:35:26.687122', 'step': 18819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:26.723351', 'step': 18819, 'epoch': 3} {'type': 'loss', 'content': 0.1400376707315445, 'timestamp': '2025-09-30 22:35:26.750875', 'step': 18820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.781834', 'step': 18820, 'epoch': 3} {'type': 'loss', 'content': 0.0987926498055458, 'timestamp': '2025-09-30 22:35:26.785665', 'step': 18821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.831475', 'step': 18821, 'epoch': 3} {'type': 'loss', 'content': 0.027038441970944405, 'timestamp': '2025-09-30 22:35:26.839808', 'step': 18822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.870986', 'step': 18822, 'epoch': 3} {'type': 'loss', 'content': 0.05447319149971008, 'timestamp': '2025-09-30 22:35:26.877578', 'step': 18823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:26.914965', 'step': 18823, 'epoch': 3} {'type': 'loss', 'content': 0.04690718650817871, 'timestamp': '2025-09-30 22:35:26.944238', 'step': 18824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:26.976298', 'step': 18824, 'epoch': 3} {'type': 'loss', 'content': 0.09996960312128067, 'timestamp': '2025-09-30 22:35:26.979985', 'step': 18825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.016475', 'step': 18825, 'epoch': 3} {'type': 'loss', 'content': 0.0576244555413723, 'timestamp': '2025-09-30 22:35:27.021062', 'step': 18826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.056540', 'step': 18826, 'epoch': 3} {'type': 'loss', 'content': 0.06437770277261734, 'timestamp': '2025-09-30 22:35:27.059355', 'step': 18827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.090577', 'step': 18827, 'epoch': 3} {'type': 'loss', 'content': 0.03320414572954178, 'timestamp': '2025-09-30 22:35:27.116013', 'step': 18828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.148950', 'step': 18828, 'epoch': 3} {'type': 'loss', 'content': 0.11410956084728241, 'timestamp': '2025-09-30 22:35:27.151856', 'step': 18829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.182268', 'step': 18829, 'epoch': 3} {'type': 'loss', 'content': 0.10481712967157364, 'timestamp': '2025-09-30 22:35:27.191264', 'step': 18830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.224060', 'step': 18830, 'epoch': 3} {'type': 'loss', 'content': 0.042933132499456406, 'timestamp': '2025-09-30 22:35:27.227206', 'step': 18831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.263782', 'step': 18831, 'epoch': 3} {'type': 'loss', 'content': 0.06766378879547119, 'timestamp': '2025-09-30 22:35:27.289332', 'step': 18832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.320332', 'step': 18832, 'epoch': 3} {'type': 'loss', 'content': 0.0646439716219902, 'timestamp': '2025-09-30 22:35:27.325061', 'step': 18833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:27.356314', 'step': 18833, 'epoch': 3} {'type': 'loss', 'content': 0.04393159598112106, 'timestamp': '2025-09-30 22:35:27.365094', 'step': 18834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.400982', 'step': 18834, 'epoch': 3} {'type': 'loss', 'content': 0.055602945387363434, 'timestamp': '2025-09-30 22:35:27.403461', 'step': 18835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.438168', 'step': 18835, 'epoch': 3} {'type': 'loss', 'content': 0.03594336286187172, 'timestamp': '2025-09-30 22:35:27.468893', 'step': 18836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.501331', 'step': 18836, 'epoch': 3} {'type': 'loss', 'content': 0.040254145860672, 'timestamp': '2025-09-30 22:35:27.505618', 'step': 18837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.537863', 'step': 18837, 'epoch': 3} {'type': 'loss', 'content': 0.09521787613630295, 'timestamp': '2025-09-30 22:35:27.546375', 'step': 18838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.580743', 'step': 18838, 'epoch': 3} {'type': 'loss', 'content': 0.11774447560310364, 'timestamp': '2025-09-30 22:35:27.585790', 'step': 18839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.617422', 'step': 18839, 'epoch': 3} {'type': 'loss', 'content': 0.08229481428861618, 'timestamp': '2025-09-30 22:35:27.648117', 'step': 18840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:27.680557', 'step': 18840, 'epoch': 3} {'type': 'loss', 'content': 0.05398740619421005, 'timestamp': '2025-09-30 22:35:27.685098', 'step': 18841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.716012', 'step': 18841, 'epoch': 3} {'type': 'loss', 'content': 0.07246077060699463, 'timestamp': '2025-09-30 22:35:27.719659', 'step': 18842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:27.752283', 'step': 18842, 'epoch': 3} {'type': 'loss', 'content': 0.06709866970777512, 'timestamp': '2025-09-30 22:35:27.757644', 'step': 18843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.789499', 'step': 18843, 'epoch': 3} {'type': 'loss', 'content': 0.06454187631607056, 'timestamp': '2025-09-30 22:35:27.819840', 'step': 18844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.852685', 'step': 18844, 'epoch': 3} {'type': 'loss', 'content': 0.03855663165450096, 'timestamp': '2025-09-30 22:35:27.864799', 'step': 18845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.902542', 'step': 18845, 'epoch': 3} {'type': 'loss', 'content': 0.08518103510141373, 'timestamp': '2025-09-30 22:35:27.906459', 'step': 18846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:27.938095', 'step': 18846, 'epoch': 3} {'type': 'loss', 'content': 0.0411570742726326, 'timestamp': '2025-09-30 22:35:27.945696', 'step': 18847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:27.983337', 'step': 18847, 'epoch': 3} {'type': 'loss', 'content': 0.027476884424686432, 'timestamp': '2025-09-30 22:35:28.015115', 'step': 18848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.047342', 'step': 18848, 'epoch': 3} {'type': 'loss', 'content': 0.06699647754430771, 'timestamp': '2025-09-30 22:35:28.054837', 'step': 18849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.086394', 'step': 18849, 'epoch': 3} {'type': 'loss', 'content': 0.0497632771730423, 'timestamp': '2025-09-30 22:35:28.098412', 'step': 18850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.135922', 'step': 18850, 'epoch': 3} {'type': 'loss', 'content': 0.09133478999137878, 'timestamp': '2025-09-30 22:35:28.139298', 'step': 18851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.170285', 'step': 18851, 'epoch': 3} {'type': 'loss', 'content': 0.07988931983709335, 'timestamp': '2025-09-30 22:35:28.196417', 'step': 18852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.229670', 'step': 18852, 'epoch': 3} {'type': 'loss', 'content': 0.06422922760248184, 'timestamp': '2025-09-30 22:35:28.233707', 'step': 18853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.274988', 'step': 18853, 'epoch': 3} {'type': 'loss', 'content': 0.07723673433065414, 'timestamp': '2025-09-30 22:35:28.283228', 'step': 18854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.322571', 'step': 18854, 'epoch': 3} {'type': 'loss', 'content': 0.07648367434740067, 'timestamp': '2025-09-30 22:35:28.326036', 'step': 18855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.358543', 'step': 18855, 'epoch': 3} {'type': 'loss', 'content': 0.035710908472537994, 'timestamp': '2025-09-30 22:35:28.394939', 'step': 18856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.435293', 'step': 18856, 'epoch': 3} {'type': 'loss', 'content': 0.10970354080200195, 'timestamp': '2025-09-30 22:35:28.439998', 'step': 18857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.472738', 'step': 18857, 'epoch': 3} {'type': 'loss', 'content': 0.09812518209218979, 'timestamp': '2025-09-30 22:35:28.476410', 'step': 18858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.508706', 'step': 18858, 'epoch': 3} {'type': 'loss', 'content': 0.046934034675359726, 'timestamp': '2025-09-30 22:35:28.515227', 'step': 18859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:28.548539', 'step': 18859, 'epoch': 3} {'type': 'loss', 'content': 0.07701738178730011, 'timestamp': '2025-09-30 22:35:28.574822', 'step': 18860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.606677', 'step': 18860, 'epoch': 3} {'type': 'loss', 'content': 0.062157001346349716, 'timestamp': '2025-09-30 22:35:28.612737', 'step': 18861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.644593', 'step': 18861, 'epoch': 3} {'type': 'loss', 'content': 0.10006948560476303, 'timestamp': '2025-09-30 22:35:28.651420', 'step': 18862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.692511', 'step': 18862, 'epoch': 3} {'type': 'loss', 'content': 0.08559706807136536, 'timestamp': '2025-09-30 22:35:28.699287', 'step': 18863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.733960', 'step': 18863, 'epoch': 3} {'type': 'loss', 'content': 0.03610914945602417, 'timestamp': '2025-09-30 22:35:28.759299', 'step': 18864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:28.799910', 'step': 18864, 'epoch': 3} {'type': 'loss', 'content': 0.08501676470041275, 'timestamp': '2025-09-30 22:35:28.803592', 'step': 18865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:28.834468', 'step': 18865, 'epoch': 3} {'type': 'loss', 'content': 0.0823124572634697, 'timestamp': '2025-09-30 22:35:28.838368', 'step': 18866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:28.872773', 'step': 18866, 'epoch': 3} {'type': 'loss', 'content': 0.13045023381710052, 'timestamp': '2025-09-30 22:35:28.877653', 'step': 18867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:28.916320', 'step': 18867, 'epoch': 3} {'type': 'loss', 'content': 0.0875651016831398, 'timestamp': '2025-09-30 22:35:28.940837', 'step': 18868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:28.978277', 'step': 18868, 'epoch': 3} {'type': 'loss', 'content': 0.022627651691436768, 'timestamp': '2025-09-30 22:35:28.984374', 'step': 18869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.021937', 'step': 18869, 'epoch': 3} {'type': 'loss', 'content': 0.0529821552336216, 'timestamp': '2025-09-30 22:35:29.024535', 'step': 18870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.056277', 'step': 18870, 'epoch': 3} {'type': 'loss', 'content': 0.13327927887439728, 'timestamp': '2025-09-30 22:35:29.060350', 'step': 18871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:29.092093', 'step': 18871, 'epoch': 3} {'type': 'loss', 'content': 0.028833452612161636, 'timestamp': '2025-09-30 22:35:29.117504', 'step': 18872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.156230', 'step': 18872, 'epoch': 3} {'type': 'loss', 'content': 0.11902959644794464, 'timestamp': '2025-09-30 22:35:29.161262', 'step': 18873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:29.199768', 'step': 18873, 'epoch': 3} {'type': 'loss', 'content': 0.006273858714848757, 'timestamp': '2025-09-30 22:35:29.205037', 'step': 18874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:29.239434', 'step': 18874, 'epoch': 3} {'type': 'loss', 'content': 0.05377441644668579, 'timestamp': '2025-09-30 22:35:29.242937', 'step': 18875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.279088', 'step': 18875, 'epoch': 3} {'type': 'loss', 'content': 0.041306428611278534, 'timestamp': '2025-09-30 22:35:29.309258', 'step': 18876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:29.346678', 'step': 18876, 'epoch': 3} {'type': 'loss', 'content': 0.07590920478105545, 'timestamp': '2025-09-30 22:35:29.354776', 'step': 18877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.391742', 'step': 18877, 'epoch': 3} {'type': 'loss', 'content': 0.09590594470500946, 'timestamp': '2025-09-30 22:35:29.395120', 'step': 18878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.427489', 'step': 18878, 'epoch': 3} {'type': 'loss', 'content': 0.06868766993284225, 'timestamp': '2025-09-30 22:35:29.431775', 'step': 18879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.464040', 'step': 18879, 'epoch': 3} {'type': 'loss', 'content': 0.03294331952929497, 'timestamp': '2025-09-30 22:35:29.489197', 'step': 18880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.520161', 'step': 18880, 'epoch': 3} {'type': 'loss', 'content': 0.08065298944711685, 'timestamp': '2025-09-30 22:35:29.526458', 'step': 18881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.557186', 'step': 18881, 'epoch': 3} {'type': 'loss', 'content': 0.03675270453095436, 'timestamp': '2025-09-30 22:35:29.560621', 'step': 18882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:29.593649', 'step': 18882, 'epoch': 3} {'type': 'loss', 'content': 0.06488348543643951, 'timestamp': '2025-09-30 22:35:29.597755', 'step': 18883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:29.631778', 'step': 18883, 'epoch': 3} {'type': 'loss', 'content': 0.08060956001281738, 'timestamp': '2025-09-30 22:35:29.656290', 'step': 18884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.689651', 'step': 18884, 'epoch': 3} {'type': 'loss', 'content': 0.028337378054857254, 'timestamp': '2025-09-30 22:35:29.694023', 'step': 18885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.725498', 'step': 18885, 'epoch': 3} {'type': 'loss', 'content': 0.16240328550338745, 'timestamp': '2025-09-30 22:35:29.729222', 'step': 18886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.759316', 'step': 18886, 'epoch': 3} {'type': 'loss', 'content': 0.04078742489218712, 'timestamp': '2025-09-30 22:35:29.763134', 'step': 18887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:29.796333', 'step': 18887, 'epoch': 3} {'type': 'loss', 'content': 0.03222046419978142, 'timestamp': '2025-09-30 22:35:29.821417', 'step': 18888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:29.854920', 'step': 18888, 'epoch': 3} {'type': 'loss', 'content': 0.14294393360614777, 'timestamp': '2025-09-30 22:35:29.860109', 'step': 18889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:29.892704', 'step': 18889, 'epoch': 3} {'type': 'loss', 'content': 0.1458401083946228, 'timestamp': '2025-09-30 22:35:29.896812', 'step': 18890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:29.928888', 'step': 18890, 'epoch': 3} {'type': 'loss', 'content': 0.08640484511852264, 'timestamp': '2025-09-30 22:35:29.937558', 'step': 18891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:29.972721', 'step': 18891, 'epoch': 3} {'type': 'loss', 'content': 0.08511842787265778, 'timestamp': '2025-09-30 22:35:29.999149', 'step': 18892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:30.030736', 'step': 18892, 'epoch': 3} {'type': 'loss', 'content': 0.1021730974316597, 'timestamp': '2025-09-30 22:35:30.033766', 'step': 18893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.077326', 'step': 18893, 'epoch': 3} {'type': 'loss', 'content': 0.07067783176898956, 'timestamp': '2025-09-30 22:35:30.081590', 'step': 18894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:30.113160', 'step': 18894, 'epoch': 3} {'type': 'loss', 'content': 0.05585689842700958, 'timestamp': '2025-09-30 22:35:30.117824', 'step': 18895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:30.150101', 'step': 18895, 'epoch': 3} {'type': 'loss', 'content': 0.06405606120824814, 'timestamp': '2025-09-30 22:35:30.176023', 'step': 18896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:30.214899', 'step': 18896, 'epoch': 3} {'type': 'loss', 'content': 0.10840599238872528, 'timestamp': '2025-09-30 22:35:30.218786', 'step': 18897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:30.252045', 'step': 18897, 'epoch': 3} {'type': 'loss', 'content': 0.04879728704690933, 'timestamp': '2025-09-30 22:35:30.261691', 'step': 18898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:30.299258', 'step': 18898, 'epoch': 3} {'type': 'loss', 'content': 0.09774389863014221, 'timestamp': '2025-09-30 22:35:30.304000', 'step': 18899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:30.337644', 'step': 18899, 'epoch': 3} {'type': 'loss', 'content': 0.018100213259458542, 'timestamp': '2025-09-30 22:35:30.363022', 'step': 18900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:30.394769', 'step': 18900, 'epoch': 3} {'type': 'loss', 'content': 0.058695461601018906, 'timestamp': '2025-09-30 22:35:30.404438', 'step': 18901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:35:30.436953', 'step': 18901, 'epoch': 3} {'type': 'loss', 'content': 0.08664529025554657, 'timestamp': '2025-09-30 22:35:30.444705', 'step': 18902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.476662', 'step': 18902, 'epoch': 3} {'type': 'loss', 'content': 0.05107705295085907, 'timestamp': '2025-09-30 22:35:30.482045', 'step': 18903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.517290', 'step': 18903, 'epoch': 3} {'type': 'loss', 'content': 0.032376546412706375, 'timestamp': '2025-09-30 22:35:30.546890', 'step': 18904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.578817', 'step': 18904, 'epoch': 3} {'type': 'loss', 'content': 0.05719123035669327, 'timestamp': '2025-09-30 22:35:30.583481', 'step': 18905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:30.614565', 'step': 18905, 'epoch': 3} {'type': 'loss', 'content': 0.02798880822956562, 'timestamp': '2025-09-30 22:35:30.626694', 'step': 18906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.659503', 'step': 18906, 'epoch': 3} {'type': 'loss', 'content': 0.057774703949689865, 'timestamp': '2025-09-30 22:35:30.668123', 'step': 18907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:30.705527', 'step': 18907, 'epoch': 3} {'type': 'loss', 'content': 0.046139396727085114, 'timestamp': '2025-09-30 22:35:30.730464', 'step': 18908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:30.769346', 'step': 18908, 'epoch': 3} {'type': 'loss', 'content': 0.0293747428804636, 'timestamp': '2025-09-30 22:35:30.776304', 'step': 18909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:30.807940', 'step': 18909, 'epoch': 3} {'type': 'loss', 'content': 0.046639446169137955, 'timestamp': '2025-09-30 22:35:30.812262', 'step': 18910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:30.849900', 'step': 18910, 'epoch': 3} {'type': 'loss', 'content': 0.04377265274524689, 'timestamp': '2025-09-30 22:35:30.854878', 'step': 18911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:30.888266', 'step': 18911, 'epoch': 3} {'type': 'loss', 'content': 0.03782167658209801, 'timestamp': '2025-09-30 22:35:30.912893', 'step': 18912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:30.944538', 'step': 18912, 'epoch': 3} {'type': 'loss', 'content': 0.04039386659860611, 'timestamp': '2025-09-30 22:35:30.954275', 'step': 18913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:30.985139', 'step': 18913, 'epoch': 3} {'type': 'loss', 'content': 0.06808449327945709, 'timestamp': '2025-09-30 22:35:30.990582', 'step': 18914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.032831', 'step': 18914, 'epoch': 3} {'type': 'loss', 'content': 0.034962356090545654, 'timestamp': '2025-09-30 22:35:31.037529', 'step': 18915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:31.074310', 'step': 18915, 'epoch': 3} {'type': 'loss', 'content': 0.11011636257171631, 'timestamp': '2025-09-30 22:35:31.107783', 'step': 18916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.139348', 'step': 18916, 'epoch': 3} {'type': 'loss', 'content': 0.0193476602435112, 'timestamp': '2025-09-30 22:35:31.145150', 'step': 18917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.176353', 'step': 18917, 'epoch': 3} {'type': 'loss', 'content': 0.10347363352775574, 'timestamp': '2025-09-30 22:35:31.186740', 'step': 18918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:31.221763', 'step': 18918, 'epoch': 3} {'type': 'loss', 'content': 0.06613451987504959, 'timestamp': '2025-09-30 22:35:31.224272', 'step': 18919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:31.260510', 'step': 18919, 'epoch': 3} {'type': 'loss', 'content': 0.15034101903438568, 'timestamp': '2025-09-30 22:35:31.287515', 'step': 18920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.324646', 'step': 18920, 'epoch': 3} {'type': 'loss', 'content': 0.02680143341422081, 'timestamp': '2025-09-30 22:35:31.329907', 'step': 18921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.362471', 'step': 18921, 'epoch': 3} {'type': 'loss', 'content': 0.01527654379606247, 'timestamp': '2025-09-30 22:35:31.365028', 'step': 18922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:31.402482', 'step': 18922, 'epoch': 3} {'type': 'loss', 'content': 0.04901493713259697, 'timestamp': '2025-09-30 22:35:31.412596', 'step': 18923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:31.446577', 'step': 18923, 'epoch': 3} {'type': 'loss', 'content': 0.030932852998375893, 'timestamp': '2025-09-30 22:35:31.477077', 'step': 18924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:31.509905', 'step': 18924, 'epoch': 3} {'type': 'loss', 'content': 0.15960580110549927, 'timestamp': '2025-09-30 22:35:31.513304', 'step': 18925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.546346', 'step': 18925, 'epoch': 3} {'type': 'loss', 'content': 0.08630672097206116, 'timestamp': '2025-09-30 22:35:31.555556', 'step': 18926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.592820', 'step': 18926, 'epoch': 3} {'type': 'loss', 'content': 0.030265184119343758, 'timestamp': '2025-09-30 22:35:31.596238', 'step': 18927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:31.634638', 'step': 18927, 'epoch': 3} {'type': 'loss', 'content': 0.06854493916034698, 'timestamp': '2025-09-30 22:35:31.660847', 'step': 18928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.698262', 'step': 18928, 'epoch': 3} {'type': 'loss', 'content': 0.13331538438796997, 'timestamp': '2025-09-30 22:35:31.702834', 'step': 18929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.741097', 'step': 18929, 'epoch': 3} {'type': 'loss', 'content': 0.06023195758461952, 'timestamp': '2025-09-30 22:35:31.746274', 'step': 18930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:31.805732', 'step': 18930, 'epoch': 3} {'type': 'loss', 'content': 0.034584272652864456, 'timestamp': '2025-09-30 22:35:31.819392', 'step': 18931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.854881', 'step': 18931, 'epoch': 3} {'type': 'loss', 'content': 0.06853482127189636, 'timestamp': '2025-09-30 22:35:31.893776', 'step': 18932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:31.927620', 'step': 18932, 'epoch': 3} {'type': 'loss', 'content': 0.11164785176515579, 'timestamp': '2025-09-30 22:35:31.939384', 'step': 18933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:31.991635', 'step': 18933, 'epoch': 3} {'type': 'loss', 'content': 0.09827262908220291, 'timestamp': '2025-09-30 22:35:31.998494', 'step': 18934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:32.038100', 'step': 18934, 'epoch': 3} {'type': 'loss', 'content': 0.07207466661930084, 'timestamp': '2025-09-30 22:35:32.053694', 'step': 18935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:32.086056', 'step': 18935, 'epoch': 3} {'type': 'loss', 'content': 0.05615026503801346, 'timestamp': '2025-09-30 22:35:32.115242', 'step': 18936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.150394', 'step': 18936, 'epoch': 3} {'type': 'loss', 'content': 0.06950414180755615, 'timestamp': '2025-09-30 22:35:32.156489', 'step': 18937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:32.198682', 'step': 18937, 'epoch': 3} {'type': 'loss', 'content': 0.08285488933324814, 'timestamp': '2025-09-30 22:35:32.211660', 'step': 18938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.252929', 'step': 18938, 'epoch': 3} {'type': 'loss', 'content': 0.11934105306863785, 'timestamp': '2025-09-30 22:35:32.257187', 'step': 18939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.289392', 'step': 18939, 'epoch': 3} {'type': 'loss', 'content': 0.017754651606082916, 'timestamp': '2025-09-30 22:35:32.325103', 'step': 18940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:32.357572', 'step': 18940, 'epoch': 3} {'type': 'loss', 'content': 0.09023665636777878, 'timestamp': '2025-09-30 22:35:32.399726', 'step': 18941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:32.445201', 'step': 18941, 'epoch': 3} {'type': 'loss', 'content': 0.13875441253185272, 'timestamp': '2025-09-30 22:35:32.458642', 'step': 18942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.497410', 'step': 18942, 'epoch': 3} {'type': 'loss', 'content': 0.07805424928665161, 'timestamp': '2025-09-30 22:35:32.504980', 'step': 18943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:32.542166', 'step': 18943, 'epoch': 3} {'type': 'loss', 'content': 0.06262414902448654, 'timestamp': '2025-09-30 22:35:32.583750', 'step': 18944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.620828', 'step': 18944, 'epoch': 3} {'type': 'loss', 'content': 0.06659110635519028, 'timestamp': '2025-09-30 22:35:32.625422', 'step': 18945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:32.660233', 'step': 18945, 'epoch': 3} {'type': 'loss', 'content': 0.08589626103639603, 'timestamp': '2025-09-30 22:35:32.694531', 'step': 18946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.734168', 'step': 18946, 'epoch': 3} {'type': 'loss', 'content': 0.006847895681858063, 'timestamp': '2025-09-30 22:35:32.746228', 'step': 18947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:32.799494', 'step': 18947, 'epoch': 3} {'type': 'loss', 'content': 0.051250215619802475, 'timestamp': '2025-09-30 22:35:32.844383', 'step': 18948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:32.890417', 'step': 18948, 'epoch': 3} {'type': 'loss', 'content': 0.06011339649558067, 'timestamp': '2025-09-30 22:35:32.905880', 'step': 18949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:32.955977', 'step': 18949, 'epoch': 3} {'type': 'loss', 'content': 0.04787950590252876, 'timestamp': '2025-09-30 22:35:32.972206', 'step': 18950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:33.011567', 'step': 18950, 'epoch': 3} {'type': 'loss', 'content': 0.06529723107814789, 'timestamp': '2025-09-30 22:35:33.032415', 'step': 18951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:33.077846', 'step': 18951, 'epoch': 3} {'type': 'loss', 'content': 0.07213655114173889, 'timestamp': '2025-09-30 22:35:33.115747', 'step': 18952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:33.154822', 'step': 18952, 'epoch': 3} {'type': 'loss', 'content': 0.09667500853538513, 'timestamp': '2025-09-30 22:35:33.168704', 'step': 18953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:33.207050', 'step': 18953, 'epoch': 3} {'type': 'loss', 'content': 0.04508960619568825, 'timestamp': '2025-09-30 22:35:33.213804', 'step': 18954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.251185', 'step': 18954, 'epoch': 3} {'type': 'loss', 'content': 0.11069698631763458, 'timestamp': '2025-09-30 22:35:33.261029', 'step': 18955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:33.302665', 'step': 18955, 'epoch': 3} {'type': 'loss', 'content': 0.07393135875463486, 'timestamp': '2025-09-30 22:35:33.328913', 'step': 18956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.381906', 'step': 18956, 'epoch': 3} {'type': 'loss', 'content': 0.049078475683927536, 'timestamp': '2025-09-30 22:35:33.391730', 'step': 18957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.442196', 'step': 18957, 'epoch': 3} {'type': 'loss', 'content': 0.031537968665361404, 'timestamp': '2025-09-30 22:35:33.466585', 'step': 18958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.513552', 'step': 18958, 'epoch': 3} {'type': 'loss', 'content': 0.07493762671947479, 'timestamp': '2025-09-30 22:35:33.520464', 'step': 18959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:33.576493', 'step': 18959, 'epoch': 3} {'type': 'loss', 'content': 0.03597569838166237, 'timestamp': '2025-09-30 22:35:33.614467', 'step': 18960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.647836', 'step': 18960, 'epoch': 3} {'type': 'loss', 'content': 0.11901325732469559, 'timestamp': '2025-09-30 22:35:33.659255', 'step': 18961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.696935', 'step': 18961, 'epoch': 3} {'type': 'loss', 'content': 0.0782393142580986, 'timestamp': '2025-09-30 22:35:33.714956', 'step': 18962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.779267', 'step': 18962, 'epoch': 3} {'type': 'loss', 'content': 0.06805549561977386, 'timestamp': '2025-09-30 22:35:33.794842', 'step': 18963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:33.829481', 'step': 18963, 'epoch': 3} {'type': 'loss', 'content': 0.05325363203883171, 'timestamp': '2025-09-30 22:35:33.884377', 'step': 18964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:33.918103', 'step': 18964, 'epoch': 3} {'type': 'loss', 'content': 0.0669168010354042, 'timestamp': '2025-09-30 22:35:33.932039', 'step': 18965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:33.974816', 'step': 18965, 'epoch': 3} {'type': 'loss', 'content': 0.03822314366698265, 'timestamp': '2025-09-30 22:35:33.987996', 'step': 18966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:34.028161', 'step': 18966, 'epoch': 3} {'type': 'loss', 'content': 0.04087692126631737, 'timestamp': '2025-09-30 22:35:34.036361', 'step': 18967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:34.066354', 'step': 18967, 'epoch': 3} {'type': 'loss', 'content': 0.07309757173061371, 'timestamp': '2025-09-30 22:35:34.101315', 'step': 18968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.138436', 'step': 18968, 'epoch': 3} {'type': 'loss', 'content': 0.03491797298192978, 'timestamp': '2025-09-30 22:35:34.148880', 'step': 18969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.203578', 'step': 18969, 'epoch': 3} {'type': 'loss', 'content': 0.1245700865983963, 'timestamp': '2025-09-30 22:35:34.237255', 'step': 18970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.280392', 'step': 18970, 'epoch': 3} {'type': 'loss', 'content': 0.037670738995075226, 'timestamp': '2025-09-30 22:35:34.283739', 'step': 18971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:34.320056', 'step': 18971, 'epoch': 3} {'type': 'loss', 'content': 0.09872771799564362, 'timestamp': '2025-09-30 22:35:34.353150', 'step': 18972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:34.388477', 'step': 18972, 'epoch': 3} {'type': 'loss', 'content': 0.021175825968384743, 'timestamp': '2025-09-30 22:35:34.396468', 'step': 18973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.440276', 'step': 18973, 'epoch': 3} {'type': 'loss', 'content': 0.07907716184854507, 'timestamp': '2025-09-30 22:35:34.445759', 'step': 18974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:34.477883', 'step': 18974, 'epoch': 3} {'type': 'loss', 'content': 0.05853358656167984, 'timestamp': '2025-09-30 22:35:34.483716', 'step': 18975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.518001', 'step': 18975, 'epoch': 3} {'type': 'loss', 'content': 0.0374930165708065, 'timestamp': '2025-09-30 22:35:34.549185', 'step': 18976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.586408', 'step': 18976, 'epoch': 3} {'type': 'loss', 'content': 0.11154603213071823, 'timestamp': '2025-09-30 22:35:34.594241', 'step': 18977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.633204', 'step': 18977, 'epoch': 3} {'type': 'loss', 'content': 0.052874788641929626, 'timestamp': '2025-09-30 22:35:34.636904', 'step': 18978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:34.669570', 'step': 18978, 'epoch': 3} {'type': 'loss', 'content': 0.08878689259290695, 'timestamp': '2025-09-30 22:35:34.672258', 'step': 18979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.718155', 'step': 18979, 'epoch': 3} {'type': 'loss', 'content': 0.1016760915517807, 'timestamp': '2025-09-30 22:35:34.748106', 'step': 18980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:34.778720', 'step': 18980, 'epoch': 3} {'type': 'loss', 'content': 0.07549677044153214, 'timestamp': '2025-09-30 22:35:34.782792', 'step': 18981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:34.813770', 'step': 18981, 'epoch': 3} {'type': 'loss', 'content': 0.10770309716463089, 'timestamp': '2025-09-30 22:35:34.816357', 'step': 18982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.851183', 'step': 18982, 'epoch': 3} {'type': 'loss', 'content': 0.03892778605222702, 'timestamp': '2025-09-30 22:35:34.858396', 'step': 18983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:34.893656', 'step': 18983, 'epoch': 3} {'type': 'loss', 'content': 0.06471662223339081, 'timestamp': '2025-09-30 22:35:34.920739', 'step': 18984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.957495', 'step': 18984, 'epoch': 3} {'type': 'loss', 'content': 0.06528301537036896, 'timestamp': '2025-09-30 22:35:34.960215', 'step': 18985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:34.994439', 'step': 18985, 'epoch': 3} {'type': 'loss', 'content': 0.03959082439541817, 'timestamp': '2025-09-30 22:35:34.998840', 'step': 18986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:35.029765', 'step': 18986, 'epoch': 3} {'type': 'loss', 'content': 0.06782090663909912, 'timestamp': '2025-09-30 22:35:35.032290', 'step': 18987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.063838', 'step': 18987, 'epoch': 3} {'type': 'loss', 'content': 0.024891600012779236, 'timestamp': '2025-09-30 22:35:35.089128', 'step': 18988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:35.122182', 'step': 18988, 'epoch': 3} {'type': 'loss', 'content': 0.08562450855970383, 'timestamp': '2025-09-30 22:35:35.126010', 'step': 18989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:35.157519', 'step': 18989, 'epoch': 3} {'type': 'loss', 'content': 0.04731149598956108, 'timestamp': '2025-09-30 22:35:35.161406', 'step': 18990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.194563', 'step': 18990, 'epoch': 3} {'type': 'loss', 'content': 0.025024080649018288, 'timestamp': '2025-09-30 22:35:35.198381', 'step': 18991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.228730', 'step': 18991, 'epoch': 3} {'type': 'loss', 'content': 0.09295575320720673, 'timestamp': '2025-09-30 22:35:35.253604', 'step': 18992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.292683', 'step': 18992, 'epoch': 3} {'type': 'loss', 'content': 0.043526697903871536, 'timestamp': '2025-09-30 22:35:35.299053', 'step': 18993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:35.333233', 'step': 18993, 'epoch': 3} {'type': 'loss', 'content': 0.010653078556060791, 'timestamp': '2025-09-30 22:35:35.341483', 'step': 18994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:35.372395', 'step': 18994, 'epoch': 3} {'type': 'loss', 'content': 0.043370701372623444, 'timestamp': '2025-09-30 22:35:35.376439', 'step': 18995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:35.410292', 'step': 18995, 'epoch': 3} {'type': 'loss', 'content': 0.06749740242958069, 'timestamp': '2025-09-30 22:35:35.441074', 'step': 18996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:35.473527', 'step': 18996, 'epoch': 3} {'type': 'loss', 'content': 0.06794920563697815, 'timestamp': '2025-09-30 22:35:35.476471', 'step': 18997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.514874', 'step': 18997, 'epoch': 3} {'type': 'loss', 'content': 0.06026817113161087, 'timestamp': '2025-09-30 22:35:35.523062', 'step': 18998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.560212', 'step': 18998, 'epoch': 3} {'type': 'loss', 'content': 0.018421398475766182, 'timestamp': '2025-09-30 22:35:35.568014', 'step': 18999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:35.610323', 'step': 18999, 'epoch': 3} {'type': 'loss', 'content': 0.08323012292385101, 'timestamp': '2025-09-30 22:35:35.636238', 'step': 19000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19000', 'timestamp': '2025-09-30 22:35:40.921409', 'step': 19000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:40.953281', 'step': 19000, 'epoch': 3} {'type': 'loss', 'content': 0.06772951781749725, 'timestamp': '2025-09-30 22:35:40.958032', 'step': 19001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:40.993136', 'step': 19001, 'epoch': 3} {'type': 'loss', 'content': 0.09897293150424957, 'timestamp': '2025-09-30 22:35:40.998352', 'step': 19002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.032618', 'step': 19002, 'epoch': 3} {'type': 'loss', 'content': 0.046518474817276, 'timestamp': '2025-09-30 22:35:41.047116', 'step': 19003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:41.088966', 'step': 19003, 'epoch': 3} {'type': 'loss', 'content': 0.054758165031671524, 'timestamp': '2025-09-30 22:35:41.113318', 'step': 19004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:41.148615', 'step': 19004, 'epoch': 3} {'type': 'loss', 'content': 0.07692410796880722, 'timestamp': '2025-09-30 22:35:41.152165', 'step': 19005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.184052', 'step': 19005, 'epoch': 3} {'type': 'loss', 'content': 0.06610869616270065, 'timestamp': '2025-09-30 22:35:41.190346', 'step': 19006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:41.224067', 'step': 19006, 'epoch': 3} {'type': 'loss', 'content': 0.1356462985277176, 'timestamp': '2025-09-30 22:35:41.227783', 'step': 19007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:41.270064', 'step': 19007, 'epoch': 3} {'type': 'loss', 'content': 0.03143290430307388, 'timestamp': '2025-09-30 22:35:41.294467', 'step': 19008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.334323', 'step': 19008, 'epoch': 3} {'type': 'loss', 'content': 0.030176086351275444, 'timestamp': '2025-09-30 22:35:41.339470', 'step': 19009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:41.378661', 'step': 19009, 'epoch': 3} {'type': 'loss', 'content': 0.011020829901099205, 'timestamp': '2025-09-30 22:35:41.396511', 'step': 19010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:41.432394', 'step': 19010, 'epoch': 3} {'type': 'loss', 'content': 0.010174253024160862, 'timestamp': '2025-09-30 22:35:41.438359', 'step': 19011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:41.469952', 'step': 19011, 'epoch': 3} {'type': 'loss', 'content': 0.03561175614595413, 'timestamp': '2025-09-30 22:35:41.498906', 'step': 19012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:41.537036', 'step': 19012, 'epoch': 3} {'type': 'loss', 'content': 0.024900825694203377, 'timestamp': '2025-09-30 22:35:41.542274', 'step': 19013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.575590', 'step': 19013, 'epoch': 3} {'type': 'loss', 'content': 0.0426730141043663, 'timestamp': '2025-09-30 22:35:41.579556', 'step': 19014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:41.609767', 'step': 19014, 'epoch': 3} {'type': 'loss', 'content': 0.042801324278116226, 'timestamp': '2025-09-30 22:35:41.612558', 'step': 19015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.646618', 'step': 19015, 'epoch': 3} {'type': 'loss', 'content': 0.09770210832357407, 'timestamp': '2025-09-30 22:35:41.671417', 'step': 19016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.703311', 'step': 19016, 'epoch': 3} {'type': 'loss', 'content': 0.06865020841360092, 'timestamp': '2025-09-30 22:35:41.706276', 'step': 19017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:41.740981', 'step': 19017, 'epoch': 3} {'type': 'loss', 'content': 0.087628573179245, 'timestamp': '2025-09-30 22:35:41.744958', 'step': 19018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.777565', 'step': 19018, 'epoch': 3} {'type': 'loss', 'content': 0.12349636107683182, 'timestamp': '2025-09-30 22:35:41.781286', 'step': 19019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.812913', 'step': 19019, 'epoch': 3} {'type': 'loss', 'content': 0.03944748267531395, 'timestamp': '2025-09-30 22:35:41.840873', 'step': 19020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.873780', 'step': 19020, 'epoch': 3} {'type': 'loss', 'content': 0.0286774393171072, 'timestamp': '2025-09-30 22:35:41.876463', 'step': 19021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:41.907575', 'step': 19021, 'epoch': 3} {'type': 'loss', 'content': 0.053957656025886536, 'timestamp': '2025-09-30 22:35:41.912257', 'step': 19022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:41.946502', 'step': 19022, 'epoch': 3} {'type': 'loss', 'content': 0.07761888951063156, 'timestamp': '2025-09-30 22:35:41.957922', 'step': 19023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:41.992424', 'step': 19023, 'epoch': 3} {'type': 'loss', 'content': 0.04675200581550598, 'timestamp': '2025-09-30 22:35:42.018253', 'step': 19024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:42.050056', 'step': 19024, 'epoch': 3} {'type': 'loss', 'content': 0.07140418142080307, 'timestamp': '2025-09-30 22:35:42.053486', 'step': 19025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:42.086286', 'step': 19025, 'epoch': 3} {'type': 'loss', 'content': 0.09910103678703308, 'timestamp': '2025-09-30 22:35:42.090899', 'step': 19026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:42.124786', 'step': 19026, 'epoch': 3} {'type': 'loss', 'content': 0.10250528901815414, 'timestamp': '2025-09-30 22:35:42.129541', 'step': 19027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.171754', 'step': 19027, 'epoch': 3} {'type': 'loss', 'content': 0.05148984119296074, 'timestamp': '2025-09-30 22:35:42.201642', 'step': 19028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:42.238367', 'step': 19028, 'epoch': 3} {'type': 'loss', 'content': 0.042045656591653824, 'timestamp': '2025-09-30 22:35:42.245521', 'step': 19029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:42.284743', 'step': 19029, 'epoch': 3} {'type': 'loss', 'content': 0.047449786216020584, 'timestamp': '2025-09-30 22:35:42.292052', 'step': 19030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:42.327308', 'step': 19030, 'epoch': 3} {'type': 'loss', 'content': 0.10233710706233978, 'timestamp': '2025-09-30 22:35:42.334345', 'step': 19031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:42.379157', 'step': 19031, 'epoch': 3} {'type': 'loss', 'content': 0.024706080555915833, 'timestamp': '2025-09-30 22:35:42.408243', 'step': 19032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.440519', 'step': 19032, 'epoch': 3} {'type': 'loss', 'content': 0.06631864607334137, 'timestamp': '2025-09-30 22:35:42.444733', 'step': 19033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:42.475797', 'step': 19033, 'epoch': 3} {'type': 'loss', 'content': 0.05628698319196701, 'timestamp': '2025-09-30 22:35:42.478602', 'step': 19034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:42.509205', 'step': 19034, 'epoch': 3} {'type': 'loss', 'content': 0.03563190996646881, 'timestamp': '2025-09-30 22:35:42.511725', 'step': 19035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:42.541807', 'step': 19035, 'epoch': 3} {'type': 'loss', 'content': 0.009454887360334396, 'timestamp': '2025-09-30 22:35:42.567990', 'step': 19036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:42.599862', 'step': 19036, 'epoch': 3} {'type': 'loss', 'content': 0.09472675621509552, 'timestamp': '2025-09-30 22:35:42.604729', 'step': 19037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:42.636825', 'step': 19037, 'epoch': 3} {'type': 'loss', 'content': 0.017673995345830917, 'timestamp': '2025-09-30 22:35:42.640559', 'step': 19038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.675403', 'step': 19038, 'epoch': 3} {'type': 'loss', 'content': 0.09650134295225143, 'timestamp': '2025-09-30 22:35:42.680728', 'step': 19039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.712697', 'step': 19039, 'epoch': 3} {'type': 'loss', 'content': 0.01966550573706627, 'timestamp': '2025-09-30 22:35:42.736519', 'step': 19040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.768439', 'step': 19040, 'epoch': 3} {'type': 'loss', 'content': 0.09072625637054443, 'timestamp': '2025-09-30 22:35:42.770679', 'step': 19041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:42.801227', 'step': 19041, 'epoch': 3} {'type': 'loss', 'content': 0.038959212601184845, 'timestamp': '2025-09-30 22:35:42.805299', 'step': 19042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.836043', 'step': 19042, 'epoch': 3} {'type': 'loss', 'content': 0.035335518419742584, 'timestamp': '2025-09-30 22:35:42.840017', 'step': 19043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.873308', 'step': 19043, 'epoch': 3} {'type': 'loss', 'content': 0.023701248690485954, 'timestamp': '2025-09-30 22:35:42.899108', 'step': 19044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:42.929583', 'step': 19044, 'epoch': 3} {'type': 'loss', 'content': 0.08709585666656494, 'timestamp': '2025-09-30 22:35:42.934933', 'step': 19045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:42.965299', 'step': 19045, 'epoch': 3} {'type': 'loss', 'content': 0.05211058259010315, 'timestamp': '2025-09-30 22:35:42.968536', 'step': 19046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.000356', 'step': 19046, 'epoch': 3} {'type': 'loss', 'content': 0.04253154247999191, 'timestamp': '2025-09-30 22:35:43.005265', 'step': 19047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:43.045156', 'step': 19047, 'epoch': 3} {'type': 'loss', 'content': 0.020199963822960854, 'timestamp': '2025-09-30 22:35:43.068806', 'step': 19048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:43.098766', 'step': 19048, 'epoch': 3} {'type': 'loss', 'content': 0.01899881847202778, 'timestamp': '2025-09-30 22:35:43.102252', 'step': 19049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.133947', 'step': 19049, 'epoch': 3} {'type': 'loss', 'content': 0.0462200902402401, 'timestamp': '2025-09-30 22:35:43.137380', 'step': 19050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.169525', 'step': 19050, 'epoch': 3} {'type': 'loss', 'content': 0.036345455795526505, 'timestamp': '2025-09-30 22:35:43.172647', 'step': 19051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:43.203880', 'step': 19051, 'epoch': 3} {'type': 'loss', 'content': 0.029269197955727577, 'timestamp': '2025-09-30 22:35:43.229086', 'step': 19052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:43.259796', 'step': 19052, 'epoch': 3} {'type': 'loss', 'content': 0.07885675132274628, 'timestamp': '2025-09-30 22:35:43.263938', 'step': 19053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:43.295276', 'step': 19053, 'epoch': 3} {'type': 'loss', 'content': 0.12010249495506287, 'timestamp': '2025-09-30 22:35:43.298831', 'step': 19054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.331857', 'step': 19054, 'epoch': 3} {'type': 'loss', 'content': 0.04502464830875397, 'timestamp': '2025-09-30 22:35:43.334598', 'step': 19055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.367409', 'step': 19055, 'epoch': 3} {'type': 'loss', 'content': 0.08571130037307739, 'timestamp': '2025-09-30 22:35:43.392486', 'step': 19056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.422228', 'step': 19056, 'epoch': 3} {'type': 'loss', 'content': 0.06918687373399734, 'timestamp': '2025-09-30 22:35:43.425489', 'step': 19057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:43.456604', 'step': 19057, 'epoch': 3} {'type': 'loss', 'content': 0.09986917674541473, 'timestamp': '2025-09-30 22:35:43.459679', 'step': 19058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.490840', 'step': 19058, 'epoch': 3} {'type': 'loss', 'content': 0.10317205637693405, 'timestamp': '2025-09-30 22:35:43.494002', 'step': 19059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.524610', 'step': 19059, 'epoch': 3} {'type': 'loss', 'content': 0.041363757103681564, 'timestamp': '2025-09-30 22:35:43.549330', 'step': 19060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.581685', 'step': 19060, 'epoch': 3} {'type': 'loss', 'content': 0.006905559450387955, 'timestamp': '2025-09-30 22:35:43.585118', 'step': 19061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.616926', 'step': 19061, 'epoch': 3} {'type': 'loss', 'content': 0.011550258845090866, 'timestamp': '2025-09-30 22:35:43.620474', 'step': 19062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.651638', 'step': 19062, 'epoch': 3} {'type': 'loss', 'content': 0.0375627838075161, 'timestamp': '2025-09-30 22:35:43.654754', 'step': 19063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.686163', 'step': 19063, 'epoch': 3} {'type': 'loss', 'content': 0.04080062359571457, 'timestamp': '2025-09-30 22:35:43.710720', 'step': 19064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.742439', 'step': 19064, 'epoch': 3} {'type': 'loss', 'content': 0.05637478828430176, 'timestamp': '2025-09-30 22:35:43.745601', 'step': 19065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.776687', 'step': 19065, 'epoch': 3} {'type': 'loss', 'content': 0.09139841049909592, 'timestamp': '2025-09-30 22:35:43.780514', 'step': 19066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:43.812514', 'step': 19066, 'epoch': 3} {'type': 'loss', 'content': 0.05634975805878639, 'timestamp': '2025-09-30 22:35:43.815538', 'step': 19067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:43.846385', 'step': 19067, 'epoch': 3} {'type': 'loss', 'content': 0.048154741525650024, 'timestamp': '2025-09-30 22:35:43.870495', 'step': 19068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:43.903086', 'step': 19068, 'epoch': 3} {'type': 'loss', 'content': 0.035828378051519394, 'timestamp': '2025-09-30 22:35:43.905463', 'step': 19069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:43.935323', 'step': 19069, 'epoch': 3} {'type': 'loss', 'content': 0.03296379745006561, 'timestamp': '2025-09-30 22:35:43.939361', 'step': 19070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:43.970934', 'step': 19070, 'epoch': 3} {'type': 'loss', 'content': 0.04460233077406883, 'timestamp': '2025-09-30 22:35:43.974300', 'step': 19071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:44.005992', 'step': 19071, 'epoch': 3} {'type': 'loss', 'content': 0.021278200671076775, 'timestamp': '2025-09-30 22:35:44.032729', 'step': 19072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:44.064172', 'step': 19072, 'epoch': 3} {'type': 'loss', 'content': 0.07369697839021683, 'timestamp': '2025-09-30 22:35:44.074007', 'step': 19073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:44.119043', 'step': 19073, 'epoch': 3} {'type': 'loss', 'content': 0.05280066654086113, 'timestamp': '2025-09-30 22:35:44.121669', 'step': 19074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.155444', 'step': 19074, 'epoch': 3} {'type': 'loss', 'content': 0.09269372373819351, 'timestamp': '2025-09-30 22:35:44.159134', 'step': 19075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.188996', 'step': 19075, 'epoch': 3} {'type': 'loss', 'content': 0.026905952021479607, 'timestamp': '2025-09-30 22:35:44.215179', 'step': 19076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:44.247166', 'step': 19076, 'epoch': 3} {'type': 'loss', 'content': 0.09171774238348007, 'timestamp': '2025-09-30 22:35:44.249437', 'step': 19077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:44.279626', 'step': 19077, 'epoch': 3} {'type': 'loss', 'content': 0.07754532992839813, 'timestamp': '2025-09-30 22:35:44.283493', 'step': 19078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.325343', 'step': 19078, 'epoch': 3} {'type': 'loss', 'content': 0.07174408435821533, 'timestamp': '2025-09-30 22:35:44.328228', 'step': 19079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.361333', 'step': 19079, 'epoch': 3} {'type': 'loss', 'content': 0.08753173798322678, 'timestamp': '2025-09-30 22:35:44.386761', 'step': 19080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.418262', 'step': 19080, 'epoch': 3} {'type': 'loss', 'content': 0.04808541387319565, 'timestamp': '2025-09-30 22:35:44.423705', 'step': 19081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:44.457038', 'step': 19081, 'epoch': 3} {'type': 'loss', 'content': 0.08770095556974411, 'timestamp': '2025-09-30 22:35:44.463209', 'step': 19082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.495158', 'step': 19082, 'epoch': 3} {'type': 'loss', 'content': 0.07593923807144165, 'timestamp': '2025-09-30 22:35:44.501244', 'step': 19083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:44.533617', 'step': 19083, 'epoch': 3} {'type': 'loss', 'content': 0.020327020436525345, 'timestamp': '2025-09-30 22:35:44.562076', 'step': 19084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.592747', 'step': 19084, 'epoch': 3} {'type': 'loss', 'content': 0.056947797536849976, 'timestamp': '2025-09-30 22:35:44.597128', 'step': 19085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.637267', 'step': 19085, 'epoch': 3} {'type': 'loss', 'content': 0.05122404173016548, 'timestamp': '2025-09-30 22:35:44.641642', 'step': 19086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.673945', 'step': 19086, 'epoch': 3} {'type': 'loss', 'content': 0.08796567469835281, 'timestamp': '2025-09-30 22:35:44.685365', 'step': 19087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:44.725210', 'step': 19087, 'epoch': 3} {'type': 'loss', 'content': 0.029336046427488327, 'timestamp': '2025-09-30 22:35:44.753549', 'step': 19088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:44.785654', 'step': 19088, 'epoch': 3} {'type': 'loss', 'content': 0.02722523920238018, 'timestamp': '2025-09-30 22:35:44.789754', 'step': 19089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:44.820863', 'step': 19089, 'epoch': 3} {'type': 'loss', 'content': 0.07764153927564621, 'timestamp': '2025-09-30 22:35:44.828899', 'step': 19090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:44.861564', 'step': 19090, 'epoch': 3} {'type': 'loss', 'content': 0.009746067225933075, 'timestamp': '2025-09-30 22:35:44.864804', 'step': 19091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:44.895928', 'step': 19091, 'epoch': 3} {'type': 'loss', 'content': 0.04869953170418739, 'timestamp': '2025-09-30 22:35:44.933589', 'step': 19092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:44.971801', 'step': 19092, 'epoch': 3} {'type': 'loss', 'content': 0.06712891161441803, 'timestamp': '2025-09-30 22:35:44.978025', 'step': 19093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:45.010020', 'step': 19093, 'epoch': 3} {'type': 'loss', 'content': 0.03425817936658859, 'timestamp': '2025-09-30 22:35:45.013042', 'step': 19094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.045767', 'step': 19094, 'epoch': 3} {'type': 'loss', 'content': 0.050199009478092194, 'timestamp': '2025-09-30 22:35:45.050998', 'step': 19095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:45.081059', 'step': 19095, 'epoch': 3} {'type': 'loss', 'content': 0.06928353756666183, 'timestamp': '2025-09-30 22:35:45.109265', 'step': 19096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.142332', 'step': 19096, 'epoch': 3} {'type': 'loss', 'content': 0.05046693980693817, 'timestamp': '2025-09-30 22:35:45.147396', 'step': 19097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.184603', 'step': 19097, 'epoch': 3} {'type': 'loss', 'content': 0.05748167261481285, 'timestamp': '2025-09-30 22:35:45.187595', 'step': 19098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:45.221419', 'step': 19098, 'epoch': 3} {'type': 'loss', 'content': 0.08239802718162537, 'timestamp': '2025-09-30 22:35:45.226570', 'step': 19099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:45.259470', 'step': 19099, 'epoch': 3} {'type': 'loss', 'content': 0.05744187533855438, 'timestamp': '2025-09-30 22:35:45.284139', 'step': 19100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.315161', 'step': 19100, 'epoch': 3} {'type': 'loss', 'content': 0.05884796008467674, 'timestamp': '2025-09-30 22:35:45.322710', 'step': 19101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.353043', 'step': 19101, 'epoch': 3} {'type': 'loss', 'content': 0.047769226133823395, 'timestamp': '2025-09-30 22:35:45.362741', 'step': 19102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:45.399224', 'step': 19102, 'epoch': 3} {'type': 'loss', 'content': 0.06462027877569199, 'timestamp': '2025-09-30 22:35:45.413626', 'step': 19103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:45.446848', 'step': 19103, 'epoch': 3} {'type': 'loss', 'content': 0.03655034676194191, 'timestamp': '2025-09-30 22:35:45.474454', 'step': 19104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:45.508160', 'step': 19104, 'epoch': 3} {'type': 'loss', 'content': 0.0754813402891159, 'timestamp': '2025-09-30 22:35:45.512105', 'step': 19105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:45.545797', 'step': 19105, 'epoch': 3} {'type': 'loss', 'content': 0.05395087972283363, 'timestamp': '2025-09-30 22:35:45.552055', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:35:53.309664', 'step': 19106, 'epoch': 3} {'type': 'pplx', 'content': 13901.652037146036, 'timestamp': '2025-09-30 22:35:53.317479', 'step': 19106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.355145', 'step': 19106, 'epoch': 3} {'type': 'loss', 'content': 0.024904794991016388, 'timestamp': '2025-09-30 22:35:53.371530', 'step': 19107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.416268', 'step': 19107, 'epoch': 3} {'type': 'loss', 'content': 0.05195916071534157, 'timestamp': '2025-09-30 22:35:53.442630', 'step': 19108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:53.477073', 'step': 19108, 'epoch': 3} {'type': 'loss', 'content': 0.008151563815772533, 'timestamp': '2025-09-30 22:35:53.489418', 'step': 19109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:53.521897', 'step': 19109, 'epoch': 3} {'type': 'loss', 'content': 0.10589557886123657, 'timestamp': '2025-09-30 22:35:53.527730', 'step': 19110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.562719', 'step': 19110, 'epoch': 3} {'type': 'loss', 'content': 0.05833750218153, 'timestamp': '2025-09-30 22:35:53.568141', 'step': 19111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.603941', 'step': 19111, 'epoch': 3} {'type': 'loss', 'content': 0.1077595204114914, 'timestamp': '2025-09-30 22:35:53.642332', 'step': 19112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:53.675154', 'step': 19112, 'epoch': 3} {'type': 'loss', 'content': 0.09382542222738266, 'timestamp': '2025-09-30 22:35:53.679871', 'step': 19113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:53.718640', 'step': 19113, 'epoch': 3} {'type': 'loss', 'content': 0.13923199474811554, 'timestamp': '2025-09-30 22:35:53.725365', 'step': 19114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:53.756762', 'step': 19114, 'epoch': 3} {'type': 'loss', 'content': 0.040690645575523376, 'timestamp': '2025-09-30 22:35:53.761338', 'step': 19115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:53.803165', 'step': 19115, 'epoch': 3} {'type': 'loss', 'content': 0.06958317011594772, 'timestamp': '2025-09-30 22:35:53.828425', 'step': 19116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.860534', 'step': 19116, 'epoch': 3} {'type': 'loss', 'content': 0.03860948607325554, 'timestamp': '2025-09-30 22:35:53.867345', 'step': 19117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.901409', 'step': 19117, 'epoch': 3} {'type': 'loss', 'content': 0.06382130086421967, 'timestamp': '2025-09-30 22:35:53.907471', 'step': 19118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:53.938854', 'step': 19118, 'epoch': 3} {'type': 'loss', 'content': 0.0427604503929615, 'timestamp': '2025-09-30 22:35:53.945576', 'step': 19119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:53.981764', 'step': 19119, 'epoch': 3} {'type': 'loss', 'content': 0.027940113097429276, 'timestamp': '2025-09-30 22:35:54.013696', 'step': 19120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.048197', 'step': 19120, 'epoch': 3} {'type': 'loss', 'content': 0.06520102173089981, 'timestamp': '2025-09-30 22:35:54.059554', 'step': 19121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.089952', 'step': 19121, 'epoch': 3} {'type': 'loss', 'content': 0.06862927228212357, 'timestamp': '2025-09-30 22:35:54.093886', 'step': 19122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.130898', 'step': 19122, 'epoch': 3} {'type': 'loss', 'content': 0.03942432254552841, 'timestamp': '2025-09-30 22:35:54.159149', 'step': 19123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:54.206422', 'step': 19123, 'epoch': 3} {'type': 'loss', 'content': 0.10782936215400696, 'timestamp': '2025-09-30 22:35:54.236142', 'step': 19124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.312162', 'step': 19124, 'epoch': 3} {'type': 'loss', 'content': 0.11935785412788391, 'timestamp': '2025-09-30 22:35:54.328548', 'step': 19125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.359284', 'step': 19125, 'epoch': 3} {'type': 'loss', 'content': 0.08791282027959824, 'timestamp': '2025-09-30 22:35:54.370591', 'step': 19126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.419120', 'step': 19126, 'epoch': 3} {'type': 'loss', 'content': 0.041072726249694824, 'timestamp': '2025-09-30 22:35:54.432867', 'step': 19127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.466343', 'step': 19127, 'epoch': 3} {'type': 'loss', 'content': 0.06108657643198967, 'timestamp': '2025-09-30 22:35:54.504359', 'step': 19128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.538601', 'step': 19128, 'epoch': 3} {'type': 'loss', 'content': 0.08617129921913147, 'timestamp': '2025-09-30 22:35:54.544645', 'step': 19129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.586182', 'step': 19129, 'epoch': 3} {'type': 'loss', 'content': 0.07074437290430069, 'timestamp': '2025-09-30 22:35:54.590114', 'step': 19130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.623700', 'step': 19130, 'epoch': 3} {'type': 'loss', 'content': 0.04733949154615402, 'timestamp': '2025-09-30 22:35:54.630090', 'step': 19131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.675734', 'step': 19131, 'epoch': 3} {'type': 'loss', 'content': 0.14245006442070007, 'timestamp': '2025-09-30 22:35:54.701539', 'step': 19132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.741162', 'step': 19132, 'epoch': 3} {'type': 'loss', 'content': 0.03806959465146065, 'timestamp': '2025-09-30 22:35:54.757679', 'step': 19133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.790709', 'step': 19133, 'epoch': 3} {'type': 'loss', 'content': 0.03016749955713749, 'timestamp': '2025-09-30 22:35:54.794684', 'step': 19134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.826156', 'step': 19134, 'epoch': 3} {'type': 'loss', 'content': 0.06368262320756912, 'timestamp': '2025-09-30 22:35:54.831135', 'step': 19135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.863817', 'step': 19135, 'epoch': 3} {'type': 'loss', 'content': 0.0515991747379303, 'timestamp': '2025-09-30 22:35:54.891732', 'step': 19136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:54.923655', 'step': 19136, 'epoch': 3} {'type': 'loss', 'content': 0.07046828418970108, 'timestamp': '2025-09-30 22:35:54.930710', 'step': 19137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:54.964266', 'step': 19137, 'epoch': 3} {'type': 'loss', 'content': 0.02406642958521843, 'timestamp': '2025-09-30 22:35:54.967923', 'step': 19138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.001256', 'step': 19138, 'epoch': 3} {'type': 'loss', 'content': 0.1173555999994278, 'timestamp': '2025-09-30 22:35:55.011308', 'step': 19139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:55.049598', 'step': 19139, 'epoch': 3} {'type': 'loss', 'content': 0.07355757057666779, 'timestamp': '2025-09-30 22:35:55.075246', 'step': 19140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.112167', 'step': 19140, 'epoch': 3} {'type': 'loss', 'content': 0.08734967559576035, 'timestamp': '2025-09-30 22:35:55.118697', 'step': 19141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.154795', 'step': 19141, 'epoch': 3} {'type': 'loss', 'content': 0.060892585664987564, 'timestamp': '2025-09-30 22:35:55.158099', 'step': 19142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.196855', 'step': 19142, 'epoch': 3} {'type': 'loss', 'content': 0.06759365648031235, 'timestamp': '2025-09-30 22:35:55.199645', 'step': 19143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.235221', 'step': 19143, 'epoch': 3} {'type': 'loss', 'content': 0.07251004129648209, 'timestamp': '2025-09-30 22:35:55.262036', 'step': 19144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.293615', 'step': 19144, 'epoch': 3} {'type': 'loss', 'content': 0.0634666383266449, 'timestamp': '2025-09-30 22:35:55.298280', 'step': 19145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:55.334317', 'step': 19145, 'epoch': 3} {'type': 'loss', 'content': 0.04429240897297859, 'timestamp': '2025-09-30 22:35:55.341961', 'step': 19146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.373164', 'step': 19146, 'epoch': 3} {'type': 'loss', 'content': 0.09387946128845215, 'timestamp': '2025-09-30 22:35:55.377224', 'step': 19147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.410118', 'step': 19147, 'epoch': 3} {'type': 'loss', 'content': 0.09130528569221497, 'timestamp': '2025-09-30 22:35:55.439377', 'step': 19148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:55.478555', 'step': 19148, 'epoch': 3} {'type': 'loss', 'content': 0.03925316035747528, 'timestamp': '2025-09-30 22:35:55.482698', 'step': 19149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:55.515323', 'step': 19149, 'epoch': 3} {'type': 'loss', 'content': 0.05889664590358734, 'timestamp': '2025-09-30 22:35:55.519725', 'step': 19150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:55.551855', 'step': 19150, 'epoch': 3} {'type': 'loss', 'content': 0.01949342153966427, 'timestamp': '2025-09-30 22:35:55.557615', 'step': 19151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.588596', 'step': 19151, 'epoch': 3} {'type': 'loss', 'content': 0.104692742228508, 'timestamp': '2025-09-30 22:35:55.613893', 'step': 19152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.643273', 'step': 19152, 'epoch': 3} {'type': 'loss', 'content': 0.038382213562726974, 'timestamp': '2025-09-30 22:35:55.658978', 'step': 19153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:55.695068', 'step': 19153, 'epoch': 3} {'type': 'loss', 'content': 0.06441988050937653, 'timestamp': '2025-09-30 22:35:55.707162', 'step': 19154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:55.741405', 'step': 19154, 'epoch': 3} {'type': 'loss', 'content': 0.13411845266819, 'timestamp': '2025-09-30 22:35:55.748893', 'step': 19155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.780825', 'step': 19155, 'epoch': 3} {'type': 'loss', 'content': 0.10546761751174927, 'timestamp': '2025-09-30 22:35:55.808130', 'step': 19156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.841166', 'step': 19156, 'epoch': 3} {'type': 'loss', 'content': 0.07646385580301285, 'timestamp': '2025-09-30 22:35:55.846285', 'step': 19157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.879427', 'step': 19157, 'epoch': 3} {'type': 'loss', 'content': 0.057731110602617264, 'timestamp': '2025-09-30 22:35:55.884054', 'step': 19158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.918094', 'step': 19158, 'epoch': 3} {'type': 'loss', 'content': 0.03971606120467186, 'timestamp': '2025-09-30 22:35:55.923339', 'step': 19159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:55.955001', 'step': 19159, 'epoch': 3} {'type': 'loss', 'content': 0.013053803704679012, 'timestamp': '2025-09-30 22:35:55.979814', 'step': 19160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.011758', 'step': 19160, 'epoch': 3} {'type': 'loss', 'content': 0.026364631950855255, 'timestamp': '2025-09-30 22:35:56.015162', 'step': 19161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:56.046315', 'step': 19161, 'epoch': 3} {'type': 'loss', 'content': 0.018502408638596535, 'timestamp': '2025-09-30 22:35:56.050355', 'step': 19162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:56.088056', 'step': 19162, 'epoch': 3} {'type': 'loss', 'content': 0.0701533779501915, 'timestamp': '2025-09-30 22:35:56.092613', 'step': 19163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.123977', 'step': 19163, 'epoch': 3} {'type': 'loss', 'content': 0.08433617651462555, 'timestamp': '2025-09-30 22:35:56.150832', 'step': 19164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.183468', 'step': 19164, 'epoch': 3} {'type': 'loss', 'content': 0.0426604188978672, 'timestamp': '2025-09-30 22:35:56.187574', 'step': 19165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.231633', 'step': 19165, 'epoch': 3} {'type': 'loss', 'content': 0.07341720163822174, 'timestamp': '2025-09-30 22:35:56.235447', 'step': 19166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.266937', 'step': 19166, 'epoch': 3} {'type': 'loss', 'content': 0.11324659734964371, 'timestamp': '2025-09-30 22:35:56.271233', 'step': 19167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.312893', 'step': 19167, 'epoch': 3} {'type': 'loss', 'content': 0.0993238016963005, 'timestamp': '2025-09-30 22:35:56.342605', 'step': 19168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.373692', 'step': 19168, 'epoch': 3} {'type': 'loss', 'content': 0.11099814623594284, 'timestamp': '2025-09-30 22:35:56.382148', 'step': 19169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:56.415086', 'step': 19169, 'epoch': 3} {'type': 'loss', 'content': 0.12169986218214035, 'timestamp': '2025-09-30 22:35:56.423000', 'step': 19170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.455666', 'step': 19170, 'epoch': 3} {'type': 'loss', 'content': 0.10535818338394165, 'timestamp': '2025-09-30 22:35:56.459346', 'step': 19171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:56.496704', 'step': 19171, 'epoch': 3} {'type': 'loss', 'content': 0.05527368560433388, 'timestamp': '2025-09-30 22:35:56.522002', 'step': 19172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.560598', 'step': 19172, 'epoch': 3} {'type': 'loss', 'content': 0.041376665234565735, 'timestamp': '2025-09-30 22:35:56.567597', 'step': 19173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:56.599333', 'step': 19173, 'epoch': 3} {'type': 'loss', 'content': 0.11879166215658188, 'timestamp': '2025-09-30 22:35:56.603899', 'step': 19174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.636990', 'step': 19174, 'epoch': 3} {'type': 'loss', 'content': 0.09628014266490936, 'timestamp': '2025-09-30 22:35:56.640053', 'step': 19175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:56.674374', 'step': 19175, 'epoch': 3} {'type': 'loss', 'content': 0.043873872607946396, 'timestamp': '2025-09-30 22:35:56.700263', 'step': 19176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:56.750226', 'step': 19176, 'epoch': 3} {'type': 'loss', 'content': 0.09750530868768692, 'timestamp': '2025-09-30 22:35:56.757113', 'step': 19177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.789954', 'step': 19177, 'epoch': 3} {'type': 'loss', 'content': 0.17605869472026825, 'timestamp': '2025-09-30 22:35:56.798349', 'step': 19178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.831750', 'step': 19178, 'epoch': 3} {'type': 'loss', 'content': 0.054262060672044754, 'timestamp': '2025-09-30 22:35:56.835731', 'step': 19179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:56.867128', 'step': 19179, 'epoch': 3} {'type': 'loss', 'content': 0.06044822558760643, 'timestamp': '2025-09-30 22:35:56.893178', 'step': 19180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:56.927609', 'step': 19180, 'epoch': 3} {'type': 'loss', 'content': 0.10192814469337463, 'timestamp': '2025-09-30 22:35:56.935822', 'step': 19181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:56.976830', 'step': 19181, 'epoch': 3} {'type': 'loss', 'content': 0.05608993023633957, 'timestamp': '2025-09-30 22:35:56.979410', 'step': 19182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.010310', 'step': 19182, 'epoch': 3} {'type': 'loss', 'content': 0.06056205928325653, 'timestamp': '2025-09-30 22:35:57.014345', 'step': 19183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.045329', 'step': 19183, 'epoch': 3} {'type': 'loss', 'content': 0.0183918084949255, 'timestamp': '2025-09-30 22:35:57.071732', 'step': 19184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.107875', 'step': 19184, 'epoch': 3} {'type': 'loss', 'content': 0.03579416126012802, 'timestamp': '2025-09-30 22:35:57.110980', 'step': 19185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.142898', 'step': 19185, 'epoch': 3} {'type': 'loss', 'content': 0.10339947044849396, 'timestamp': '2025-09-30 22:35:57.148665', 'step': 19186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:57.187171', 'step': 19186, 'epoch': 3} {'type': 'loss', 'content': 0.05131373554468155, 'timestamp': '2025-09-30 22:35:57.191838', 'step': 19187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:57.223317', 'step': 19187, 'epoch': 3} {'type': 'loss', 'content': 0.0926637277007103, 'timestamp': '2025-09-30 22:35:57.248134', 'step': 19188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:57.279220', 'step': 19188, 'epoch': 3} {'type': 'loss', 'content': 0.1112101748585701, 'timestamp': '2025-09-30 22:35:57.286240', 'step': 19189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.319194', 'step': 19189, 'epoch': 3} {'type': 'loss', 'content': 0.04316534474492073, 'timestamp': '2025-09-30 22:35:57.321985', 'step': 19190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.353556', 'step': 19190, 'epoch': 3} {'type': 'loss', 'content': 0.04386692866683006, 'timestamp': '2025-09-30 22:35:57.358974', 'step': 19191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.401292', 'step': 19191, 'epoch': 3} {'type': 'loss', 'content': 0.09091988950967789, 'timestamp': '2025-09-30 22:35:57.425833', 'step': 19192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.457320', 'step': 19192, 'epoch': 3} {'type': 'loss', 'content': 0.08770051598548889, 'timestamp': '2025-09-30 22:35:57.465012', 'step': 19193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.497263', 'step': 19193, 'epoch': 3} {'type': 'loss', 'content': 0.10012351721525192, 'timestamp': '2025-09-30 22:35:57.507625', 'step': 19194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:57.540627', 'step': 19194, 'epoch': 3} {'type': 'loss', 'content': 0.09149030596017838, 'timestamp': '2025-09-30 22:35:57.544277', 'step': 19195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.579403', 'step': 19195, 'epoch': 3} {'type': 'loss', 'content': 0.026869557797908783, 'timestamp': '2025-09-30 22:35:57.607256', 'step': 19196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.646868', 'step': 19196, 'epoch': 3} {'type': 'loss', 'content': 0.05126796290278435, 'timestamp': '2025-09-30 22:35:57.649230', 'step': 19197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:57.690466', 'step': 19197, 'epoch': 3} {'type': 'loss', 'content': 0.019467294216156006, 'timestamp': '2025-09-30 22:35:57.705510', 'step': 19198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:57.743692', 'step': 19198, 'epoch': 3} {'type': 'loss', 'content': 0.05841780826449394, 'timestamp': '2025-09-30 22:35:57.756580', 'step': 19199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.814607', 'step': 19199, 'epoch': 3} {'type': 'loss', 'content': 0.12354498356580734, 'timestamp': '2025-09-30 22:35:57.856248', 'step': 19200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:57.906911', 'step': 19200, 'epoch': 3} {'type': 'loss', 'content': 0.07448145002126694, 'timestamp': '2025-09-30 22:35:57.919404', 'step': 19201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:57.967734', 'step': 19201, 'epoch': 3} {'type': 'loss', 'content': 0.01774824596941471, 'timestamp': '2025-09-30 22:35:57.978403', 'step': 19202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:58.028500', 'step': 19202, 'epoch': 3} {'type': 'loss', 'content': 0.07410302013158798, 'timestamp': '2025-09-30 22:35:58.046048', 'step': 19203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:58.086650', 'step': 19203, 'epoch': 3} {'type': 'loss', 'content': 0.06606826931238174, 'timestamp': '2025-09-30 22:35:58.122604', 'step': 19204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.174336', 'step': 19204, 'epoch': 3} {'type': 'loss', 'content': 0.07616512477397919, 'timestamp': '2025-09-30 22:35:58.185091', 'step': 19205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.232311', 'step': 19205, 'epoch': 3} {'type': 'loss', 'content': 0.04446960613131523, 'timestamp': '2025-09-30 22:35:58.245437', 'step': 19206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:58.290399', 'step': 19206, 'epoch': 3} {'type': 'loss', 'content': 0.13270318508148193, 'timestamp': '2025-09-30 22:35:58.306433', 'step': 19207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.347861', 'step': 19207, 'epoch': 3} {'type': 'loss', 'content': 0.06397182494401932, 'timestamp': '2025-09-30 22:35:58.390441', 'step': 19208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:58.435598', 'step': 19208, 'epoch': 3} {'type': 'loss', 'content': 0.05754735693335533, 'timestamp': '2025-09-30 22:35:58.441983', 'step': 19209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.486509', 'step': 19209, 'epoch': 3} {'type': 'loss', 'content': 0.15975864231586456, 'timestamp': '2025-09-30 22:35:58.497629', 'step': 19210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:58.547258', 'step': 19210, 'epoch': 3} {'type': 'loss', 'content': 0.056948188692331314, 'timestamp': '2025-09-30 22:35:58.558491', 'step': 19211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.606542', 'step': 19211, 'epoch': 3} {'type': 'loss', 'content': 0.055672988295555115, 'timestamp': '2025-09-30 22:35:58.641522', 'step': 19212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:58.682392', 'step': 19212, 'epoch': 3} {'type': 'loss', 'content': 0.16467398405075073, 'timestamp': '2025-09-30 22:35:58.704760', 'step': 19213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:58.741719', 'step': 19213, 'epoch': 3} {'type': 'loss', 'content': 0.04411527141928673, 'timestamp': '2025-09-30 22:35:58.750429', 'step': 19214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:58.813589', 'step': 19214, 'epoch': 3} {'type': 'loss', 'content': 0.09205541759729385, 'timestamp': '2025-09-30 22:35:58.826887', 'step': 19215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:58.870301', 'step': 19215, 'epoch': 3} {'type': 'loss', 'content': 0.14271321892738342, 'timestamp': '2025-09-30 22:35:58.912617', 'step': 19216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:58.948335', 'step': 19216, 'epoch': 3} {'type': 'loss', 'content': 0.09464113414287567, 'timestamp': '2025-09-30 22:35:58.958073', 'step': 19217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:35:59.006656', 'step': 19217, 'epoch': 3} {'type': 'loss', 'content': 0.03691370412707329, 'timestamp': '2025-09-30 22:35:59.021021', 'step': 19218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.057280', 'step': 19218, 'epoch': 3} {'type': 'loss', 'content': 0.13444897532463074, 'timestamp': '2025-09-30 22:35:59.067355', 'step': 19219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.103561', 'step': 19219, 'epoch': 3} {'type': 'loss', 'content': 0.12480475753545761, 'timestamp': '2025-09-30 22:35:59.141346', 'step': 19220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:35:59.175448', 'step': 19220, 'epoch': 3} {'type': 'loss', 'content': 0.09875942021608353, 'timestamp': '2025-09-30 22:35:59.196116', 'step': 19221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:35:59.260938', 'step': 19221, 'epoch': 3} {'type': 'loss', 'content': 0.045128192752599716, 'timestamp': '2025-09-30 22:35:59.286123', 'step': 19222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.327592', 'step': 19222, 'epoch': 3} {'type': 'loss', 'content': 0.07634450495243073, 'timestamp': '2025-09-30 22:35:59.342297', 'step': 19223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.388188', 'step': 19223, 'epoch': 3} {'type': 'loss', 'content': 0.0650428831577301, 'timestamp': '2025-09-30 22:35:59.415037', 'step': 19224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.447110', 'step': 19224, 'epoch': 3} {'type': 'loss', 'content': 0.0450369156897068, 'timestamp': '2025-09-30 22:35:59.458417', 'step': 19225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:59.504064', 'step': 19225, 'epoch': 3} {'type': 'loss', 'content': 0.03278427571058273, 'timestamp': '2025-09-30 22:35:59.508511', 'step': 19226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:59.541032', 'step': 19226, 'epoch': 3} {'type': 'loss', 'content': 0.062096841633319855, 'timestamp': '2025-09-30 22:35:59.543927', 'step': 19227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:35:59.576682', 'step': 19227, 'epoch': 3} {'type': 'loss', 'content': 0.13896125555038452, 'timestamp': '2025-09-30 22:35:59.605232', 'step': 19228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.636559', 'step': 19228, 'epoch': 3} {'type': 'loss', 'content': 0.12388557940721512, 'timestamp': '2025-09-30 22:35:59.650125', 'step': 19229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:59.682941', 'step': 19229, 'epoch': 3} {'type': 'loss', 'content': 0.1026437059044838, 'timestamp': '2025-09-30 22:35:59.686963', 'step': 19230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:59.725374', 'step': 19230, 'epoch': 3} {'type': 'loss', 'content': 0.014024285599589348, 'timestamp': '2025-09-30 22:35:59.734786', 'step': 19231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.765604', 'step': 19231, 'epoch': 3} {'type': 'loss', 'content': 0.08060763776302338, 'timestamp': '2025-09-30 22:35:59.796763', 'step': 19232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.836200', 'step': 19232, 'epoch': 3} {'type': 'loss', 'content': 0.0762099176645279, 'timestamp': '2025-09-30 22:35:59.849119', 'step': 19233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:35:59.883600', 'step': 19233, 'epoch': 3} {'type': 'loss', 'content': 0.08853115886449814, 'timestamp': '2025-09-30 22:35:59.888444', 'step': 19234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:35:59.920510', 'step': 19234, 'epoch': 3} {'type': 'loss', 'content': 0.07571247965097427, 'timestamp': '2025-09-30 22:35:59.926216', 'step': 19235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:35:59.958273', 'step': 19235, 'epoch': 3} {'type': 'loss', 'content': 0.06451166421175003, 'timestamp': '2025-09-30 22:35:59.983632', 'step': 19236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:00.017118', 'step': 19236, 'epoch': 3} {'type': 'loss', 'content': 0.14995864033699036, 'timestamp': '2025-09-30 22:36:00.020570', 'step': 19237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:00.051542', 'step': 19237, 'epoch': 3} {'type': 'loss', 'content': 0.050173692405223846, 'timestamp': '2025-09-30 22:36:00.063020', 'step': 19238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.094186', 'step': 19238, 'epoch': 3} {'type': 'loss', 'content': 0.13984408974647522, 'timestamp': '2025-09-30 22:36:00.100051', 'step': 19239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.133083', 'step': 19239, 'epoch': 3} {'type': 'loss', 'content': 0.07744426280260086, 'timestamp': '2025-09-30 22:36:00.157410', 'step': 19240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.191458', 'step': 19240, 'epoch': 3} {'type': 'loss', 'content': 0.054421618580818176, 'timestamp': '2025-09-30 22:36:00.201800', 'step': 19241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.234425', 'step': 19241, 'epoch': 3} {'type': 'loss', 'content': 0.08967817574739456, 'timestamp': '2025-09-30 22:36:00.238423', 'step': 19242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.269306', 'step': 19242, 'epoch': 3} {'type': 'loss', 'content': 0.10968644171953201, 'timestamp': '2025-09-30 22:36:00.281937', 'step': 19243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:00.319806', 'step': 19243, 'epoch': 3} {'type': 'loss', 'content': 0.073454350233078, 'timestamp': '2025-09-30 22:36:00.353167', 'step': 19244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:00.384550', 'step': 19244, 'epoch': 3} {'type': 'loss', 'content': 0.08907449245452881, 'timestamp': '2025-09-30 22:36:00.389408', 'step': 19245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.430268', 'step': 19245, 'epoch': 3} {'type': 'loss', 'content': 0.08753317594528198, 'timestamp': '2025-09-30 22:36:00.433732', 'step': 19246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.473346', 'step': 19246, 'epoch': 3} {'type': 'loss', 'content': 0.0713885948061943, 'timestamp': '2025-09-30 22:36:00.476222', 'step': 19247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.514746', 'step': 19247, 'epoch': 3} {'type': 'loss', 'content': 0.055390242487192154, 'timestamp': '2025-09-30 22:36:00.539840', 'step': 19248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.571414', 'step': 19248, 'epoch': 3} {'type': 'loss', 'content': 0.11503929644823074, 'timestamp': '2025-09-30 22:36:00.577393', 'step': 19249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:00.612319', 'step': 19249, 'epoch': 3} {'type': 'loss', 'content': 0.09747882187366486, 'timestamp': '2025-09-30 22:36:00.616000', 'step': 19250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.651668', 'step': 19250, 'epoch': 3} {'type': 'loss', 'content': 0.12585197389125824, 'timestamp': '2025-09-30 22:36:00.664416', 'step': 19251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.694939', 'step': 19251, 'epoch': 3} {'type': 'loss', 'content': 0.08409475535154343, 'timestamp': '2025-09-30 22:36:00.726734', 'step': 19252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.766979', 'step': 19252, 'epoch': 3} {'type': 'loss', 'content': 0.08964379876852036, 'timestamp': '2025-09-30 22:36:00.776958', 'step': 19253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.816796', 'step': 19253, 'epoch': 3} {'type': 'loss', 'content': 0.05000413581728935, 'timestamp': '2025-09-30 22:36:00.820710', 'step': 19254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.852480', 'step': 19254, 'epoch': 3} {'type': 'loss', 'content': 0.06939823180437088, 'timestamp': '2025-09-30 22:36:00.856044', 'step': 19255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:00.886616', 'step': 19255, 'epoch': 3} {'type': 'loss', 'content': 0.11426937580108643, 'timestamp': '2025-09-30 22:36:00.913476', 'step': 19256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:00.953153', 'step': 19256, 'epoch': 3} {'type': 'loss', 'content': 0.08629238605499268, 'timestamp': '2025-09-30 22:36:00.957731', 'step': 19257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:00.989930', 'step': 19257, 'epoch': 3} {'type': 'loss', 'content': 0.048192139714956284, 'timestamp': '2025-09-30 22:36:00.993680', 'step': 19258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.037450', 'step': 19258, 'epoch': 3} {'type': 'loss', 'content': 0.09883593767881393, 'timestamp': '2025-09-30 22:36:01.041373', 'step': 19259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.075025', 'step': 19259, 'epoch': 3} {'type': 'loss', 'content': 0.0797560065984726, 'timestamp': '2025-09-30 22:36:01.109067', 'step': 19260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:01.157099', 'step': 19260, 'epoch': 3} {'type': 'loss', 'content': 0.09449447691440582, 'timestamp': '2025-09-30 22:36:01.164451', 'step': 19261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:01.204912', 'step': 19261, 'epoch': 3} {'type': 'loss', 'content': 0.05296458676457405, 'timestamp': '2025-09-30 22:36:01.208352', 'step': 19262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.247344', 'step': 19262, 'epoch': 3} {'type': 'loss', 'content': 0.057845380157232285, 'timestamp': '2025-09-30 22:36:01.261667', 'step': 19263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.306042', 'step': 19263, 'epoch': 3} {'type': 'loss', 'content': 0.08483053743839264, 'timestamp': '2025-09-30 22:36:01.339416', 'step': 19264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.385662', 'step': 19264, 'epoch': 3} {'type': 'loss', 'content': 0.06579270213842392, 'timestamp': '2025-09-30 22:36:01.391623', 'step': 19265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.426857', 'step': 19265, 'epoch': 3} {'type': 'loss', 'content': 0.13248780369758606, 'timestamp': '2025-09-30 22:36:01.436828', 'step': 19266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:01.478501', 'step': 19266, 'epoch': 3} {'type': 'loss', 'content': 0.1289948672056198, 'timestamp': '2025-09-30 22:36:01.483864', 'step': 19267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.515782', 'step': 19267, 'epoch': 3} {'type': 'loss', 'content': 0.06990434229373932, 'timestamp': '2025-09-30 22:36:01.540686', 'step': 19268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:01.570796', 'step': 19268, 'epoch': 3} {'type': 'loss', 'content': 0.05896270275115967, 'timestamp': '2025-09-30 22:36:01.576154', 'step': 19269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.609746', 'step': 19269, 'epoch': 3} {'type': 'loss', 'content': 0.13919441401958466, 'timestamp': '2025-09-30 22:36:01.613257', 'step': 19270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:01.654750', 'step': 19270, 'epoch': 3} {'type': 'loss', 'content': 0.07486053556203842, 'timestamp': '2025-09-30 22:36:01.659518', 'step': 19271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.700044', 'step': 19271, 'epoch': 3} {'type': 'loss', 'content': 0.10450758039951324, 'timestamp': '2025-09-30 22:36:01.727603', 'step': 19272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.760313', 'step': 19272, 'epoch': 3} {'type': 'loss', 'content': 0.07443314790725708, 'timestamp': '2025-09-30 22:36:01.766465', 'step': 19273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.807803', 'step': 19273, 'epoch': 3} {'type': 'loss', 'content': 0.07352324575185776, 'timestamp': '2025-09-30 22:36:01.816047', 'step': 19274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:01.849743', 'step': 19274, 'epoch': 3} {'type': 'loss', 'content': 0.09731148928403854, 'timestamp': '2025-09-30 22:36:01.854898', 'step': 19275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:01.888693', 'step': 19275, 'epoch': 3} {'type': 'loss', 'content': 0.032288357615470886, 'timestamp': '2025-09-30 22:36:01.915594', 'step': 19276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.957786', 'step': 19276, 'epoch': 3} {'type': 'loss', 'content': 0.08599276095628738, 'timestamp': '2025-09-30 22:36:01.961608', 'step': 19277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:01.994158', 'step': 19277, 'epoch': 3} {'type': 'loss', 'content': 0.08933723717927933, 'timestamp': '2025-09-30 22:36:01.997495', 'step': 19278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.029868', 'step': 19278, 'epoch': 3} {'type': 'loss', 'content': 0.11443289369344711, 'timestamp': '2025-09-30 22:36:02.037107', 'step': 19279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:02.076684', 'step': 19279, 'epoch': 3} {'type': 'loss', 'content': 0.01728932000696659, 'timestamp': '2025-09-30 22:36:02.109924', 'step': 19280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:02.143093', 'step': 19280, 'epoch': 3} {'type': 'loss', 'content': 0.04520050436258316, 'timestamp': '2025-09-30 22:36:02.146448', 'step': 19281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:02.184194', 'step': 19281, 'epoch': 3} {'type': 'loss', 'content': 0.0708334669470787, 'timestamp': '2025-09-30 22:36:02.190668', 'step': 19282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:02.226395', 'step': 19282, 'epoch': 3} {'type': 'loss', 'content': 0.06003275886178017, 'timestamp': '2025-09-30 22:36:02.230885', 'step': 19283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:02.268051', 'step': 19283, 'epoch': 3} {'type': 'loss', 'content': 0.07238456606864929, 'timestamp': '2025-09-30 22:36:02.296509', 'step': 19284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:02.336859', 'step': 19284, 'epoch': 3} {'type': 'loss', 'content': 0.11152161657810211, 'timestamp': '2025-09-30 22:36:02.339878', 'step': 19285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.379393', 'step': 19285, 'epoch': 3} {'type': 'loss', 'content': 0.10570769011974335, 'timestamp': '2025-09-30 22:36:02.383343', 'step': 19286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.422510', 'step': 19286, 'epoch': 3} {'type': 'loss', 'content': 0.046065542846918106, 'timestamp': '2025-09-30 22:36:02.427616', 'step': 19287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:02.464296', 'step': 19287, 'epoch': 3} {'type': 'loss', 'content': 0.1293356567621231, 'timestamp': '2025-09-30 22:36:02.500159', 'step': 19288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.534227', 'step': 19288, 'epoch': 3} {'type': 'loss', 'content': 0.053984131664037704, 'timestamp': '2025-09-30 22:36:02.538897', 'step': 19289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:02.574287', 'step': 19289, 'epoch': 3} {'type': 'loss', 'content': 0.14588245749473572, 'timestamp': '2025-09-30 22:36:02.577802', 'step': 19290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.618642', 'step': 19290, 'epoch': 3} {'type': 'loss', 'content': 0.08525554090738297, 'timestamp': '2025-09-30 22:36:02.623364', 'step': 19291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:02.658394', 'step': 19291, 'epoch': 3} {'type': 'loss', 'content': 0.11437021940946579, 'timestamp': '2025-09-30 22:36:02.690019', 'step': 19292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.723066', 'step': 19292, 'epoch': 3} {'type': 'loss', 'content': 0.07273010164499283, 'timestamp': '2025-09-30 22:36:02.731680', 'step': 19293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:02.773221', 'step': 19293, 'epoch': 3} {'type': 'loss', 'content': 0.10532878339290619, 'timestamp': '2025-09-30 22:36:02.777955', 'step': 19294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:02.812998', 'step': 19294, 'epoch': 3} {'type': 'loss', 'content': 0.09948436170816422, 'timestamp': '2025-09-30 22:36:02.826106', 'step': 19295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:02.868529', 'step': 19295, 'epoch': 3} {'type': 'loss', 'content': 0.02422376349568367, 'timestamp': '2025-09-30 22:36:02.893908', 'step': 19296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:02.926236', 'step': 19296, 'epoch': 3} {'type': 'loss', 'content': 0.021730078384280205, 'timestamp': '2025-09-30 22:36:02.936864', 'step': 19297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:02.968257', 'step': 19297, 'epoch': 3} {'type': 'loss', 'content': 0.06977465748786926, 'timestamp': '2025-09-30 22:36:02.972760', 'step': 19298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:03.009643', 'step': 19298, 'epoch': 3} {'type': 'loss', 'content': 0.09053696691989899, 'timestamp': '2025-09-30 22:36:03.014333', 'step': 19299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.046330', 'step': 19299, 'epoch': 3} {'type': 'loss', 'content': 0.06501400470733643, 'timestamp': '2025-09-30 22:36:03.072463', 'step': 19300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:03.106258', 'step': 19300, 'epoch': 3} {'type': 'loss', 'content': 0.06811656057834625, 'timestamp': '2025-09-30 22:36:03.110155', 'step': 19301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.155980', 'step': 19301, 'epoch': 3} {'type': 'loss', 'content': 0.11107978224754333, 'timestamp': '2025-09-30 22:36:03.161464', 'step': 19302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.194775', 'step': 19302, 'epoch': 3} {'type': 'loss', 'content': 0.06862517446279526, 'timestamp': '2025-09-30 22:36:03.200240', 'step': 19303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.232695', 'step': 19303, 'epoch': 3} {'type': 'loss', 'content': 0.034538451582193375, 'timestamp': '2025-09-30 22:36:03.258988', 'step': 19304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.294689', 'step': 19304, 'epoch': 3} {'type': 'loss', 'content': 0.10976191610097885, 'timestamp': '2025-09-30 22:36:03.299245', 'step': 19305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.338822', 'step': 19305, 'epoch': 3} {'type': 'loss', 'content': 0.07295835018157959, 'timestamp': '2025-09-30 22:36:03.343525', 'step': 19306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.378077', 'step': 19306, 'epoch': 3} {'type': 'loss', 'content': 0.05414901673793793, 'timestamp': '2025-09-30 22:36:03.382037', 'step': 19307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.422721', 'step': 19307, 'epoch': 3} {'type': 'loss', 'content': 0.009763072244822979, 'timestamp': '2025-09-30 22:36:03.447979', 'step': 19308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.481884', 'step': 19308, 'epoch': 3} {'type': 'loss', 'content': 0.1015748381614685, 'timestamp': '2025-09-30 22:36:03.486607', 'step': 19309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.519371', 'step': 19309, 'epoch': 3} {'type': 'loss', 'content': 0.03681222349405289, 'timestamp': '2025-09-30 22:36:03.523468', 'step': 19310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:03.557096', 'step': 19310, 'epoch': 3} {'type': 'loss', 'content': 0.06755488365888596, 'timestamp': '2025-09-30 22:36:03.568785', 'step': 19311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:03.600463', 'step': 19311, 'epoch': 3} {'type': 'loss', 'content': 0.09559322148561478, 'timestamp': '2025-09-30 22:36:03.625391', 'step': 19312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.657789', 'step': 19312, 'epoch': 3} {'type': 'loss', 'content': 0.023911431431770325, 'timestamp': '2025-09-30 22:36:03.664049', 'step': 19313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.696260', 'step': 19313, 'epoch': 3} {'type': 'loss', 'content': 0.11466750502586365, 'timestamp': '2025-09-30 22:36:03.699914', 'step': 19314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.733468', 'step': 19314, 'epoch': 3} {'type': 'loss', 'content': 0.07679428160190582, 'timestamp': '2025-09-30 22:36:03.744493', 'step': 19315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.802380', 'step': 19315, 'epoch': 3} {'type': 'loss', 'content': 0.13344915211200714, 'timestamp': '2025-09-30 22:36:03.829701', 'step': 19316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:03.861435', 'step': 19316, 'epoch': 3} {'type': 'loss', 'content': 0.08728985488414764, 'timestamp': '2025-09-30 22:36:03.873829', 'step': 19317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.905540', 'step': 19317, 'epoch': 3} {'type': 'loss', 'content': 0.08825937658548355, 'timestamp': '2025-09-30 22:36:03.909119', 'step': 19318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:03.951819', 'step': 19318, 'epoch': 3} {'type': 'loss', 'content': 0.06434638053178787, 'timestamp': '2025-09-30 22:36:03.957144', 'step': 19319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:03.992746', 'step': 19319, 'epoch': 3} {'type': 'loss', 'content': 0.08644832670688629, 'timestamp': '2025-09-30 22:36:04.031070', 'step': 19320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.063169', 'step': 19320, 'epoch': 3} {'type': 'loss', 'content': 0.09456492215394974, 'timestamp': '2025-09-30 22:36:04.074670', 'step': 19321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:04.104976', 'step': 19321, 'epoch': 3} {'type': 'loss', 'content': 0.15010222792625427, 'timestamp': '2025-09-30 22:36:04.121619', 'step': 19322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:04.153840', 'step': 19322, 'epoch': 3} {'type': 'loss', 'content': 0.05588514730334282, 'timestamp': '2025-09-30 22:36:04.157401', 'step': 19323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.189360', 'step': 19323, 'epoch': 3} {'type': 'loss', 'content': 0.049500416964292526, 'timestamp': '2025-09-30 22:36:04.216730', 'step': 19324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:04.248559', 'step': 19324, 'epoch': 3} {'type': 'loss', 'content': 0.014147465117275715, 'timestamp': '2025-09-30 22:36:04.251253', 'step': 19325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:04.282421', 'step': 19325, 'epoch': 3} {'type': 'loss', 'content': 0.0915861651301384, 'timestamp': '2025-09-30 22:36:04.285229', 'step': 19326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.317348', 'step': 19326, 'epoch': 3} {'type': 'loss', 'content': 0.04840649664402008, 'timestamp': '2025-09-30 22:36:04.330918', 'step': 19327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:04.362351', 'step': 19327, 'epoch': 3} {'type': 'loss', 'content': 0.09021665900945663, 'timestamp': '2025-09-30 22:36:04.398821', 'step': 19328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:04.438321', 'step': 19328, 'epoch': 3} {'type': 'loss', 'content': 0.05356767401099205, 'timestamp': '2025-09-30 22:36:04.442104', 'step': 19329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:04.474201', 'step': 19329, 'epoch': 3} {'type': 'loss', 'content': 0.05710167437791824, 'timestamp': '2025-09-30 22:36:04.477487', 'step': 19330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:04.509176', 'step': 19330, 'epoch': 3} {'type': 'loss', 'content': 0.10637257993221283, 'timestamp': '2025-09-30 22:36:04.513242', 'step': 19331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:04.544349', 'step': 19331, 'epoch': 3} {'type': 'loss', 'content': 0.054475340992212296, 'timestamp': '2025-09-30 22:36:04.570033', 'step': 19332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:04.607267', 'step': 19332, 'epoch': 3} {'type': 'loss', 'content': 0.1083291620016098, 'timestamp': '2025-09-30 22:36:04.610958', 'step': 19333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.642839', 'step': 19333, 'epoch': 3} {'type': 'loss', 'content': 0.0813196673989296, 'timestamp': '2025-09-30 22:36:04.654512', 'step': 19334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:04.687615', 'step': 19334, 'epoch': 3} {'type': 'loss', 'content': 0.01806696318089962, 'timestamp': '2025-09-30 22:36:04.691139', 'step': 19335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.724014', 'step': 19335, 'epoch': 3} {'type': 'loss', 'content': 0.06266982108354568, 'timestamp': '2025-09-30 22:36:04.750476', 'step': 19336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.784444', 'step': 19336, 'epoch': 3} {'type': 'loss', 'content': 0.05063693970441818, 'timestamp': '2025-09-30 22:36:04.792456', 'step': 19337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.831671', 'step': 19337, 'epoch': 3} {'type': 'loss', 'content': 0.0847671627998352, 'timestamp': '2025-09-30 22:36:04.836026', 'step': 19338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:04.867169', 'step': 19338, 'epoch': 3} {'type': 'loss', 'content': 0.11544033885002136, 'timestamp': '2025-09-30 22:36:04.871243', 'step': 19339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.902575', 'step': 19339, 'epoch': 3} {'type': 'loss', 'content': 0.0985432043671608, 'timestamp': '2025-09-30 22:36:04.927890', 'step': 19340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:04.958337', 'step': 19340, 'epoch': 3} {'type': 'loss', 'content': 0.13655462861061096, 'timestamp': '2025-09-30 22:36:04.966599', 'step': 19341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:04.998771', 'step': 19341, 'epoch': 3} {'type': 'loss', 'content': 0.08167755603790283, 'timestamp': '2025-09-30 22:36:05.003686', 'step': 19342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.035095', 'step': 19342, 'epoch': 3} {'type': 'loss', 'content': 0.10512249171733856, 'timestamp': '2025-09-30 22:36:05.038581', 'step': 19343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:05.072040', 'step': 19343, 'epoch': 3} {'type': 'loss', 'content': 0.07344475388526917, 'timestamp': '2025-09-30 22:36:05.098230', 'step': 19344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.130824', 'step': 19344, 'epoch': 3} {'type': 'loss', 'content': 0.04424357786774635, 'timestamp': '2025-09-30 22:36:05.136805', 'step': 19345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.169479', 'step': 19345, 'epoch': 3} {'type': 'loss', 'content': 0.08668582886457443, 'timestamp': '2025-09-30 22:36:05.173443', 'step': 19346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:05.206062', 'step': 19346, 'epoch': 3} {'type': 'loss', 'content': 0.047509919852018356, 'timestamp': '2025-09-30 22:36:05.219798', 'step': 19347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.254187', 'step': 19347, 'epoch': 3} {'type': 'loss', 'content': 0.03528350964188576, 'timestamp': '2025-09-30 22:36:05.292444', 'step': 19348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.324831', 'step': 19348, 'epoch': 3} {'type': 'loss', 'content': 0.08379576355218887, 'timestamp': '2025-09-30 22:36:05.336546', 'step': 19349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.369276', 'step': 19349, 'epoch': 3} {'type': 'loss', 'content': 0.06710901111364365, 'timestamp': '2025-09-30 22:36:05.374152', 'step': 19350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:05.410691', 'step': 19350, 'epoch': 3} {'type': 'loss', 'content': 0.057313304394483566, 'timestamp': '2025-09-30 22:36:05.413604', 'step': 19351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.445958', 'step': 19351, 'epoch': 3} {'type': 'loss', 'content': 0.09744538366794586, 'timestamp': '2025-09-30 22:36:05.472427', 'step': 19352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.503827', 'step': 19352, 'epoch': 3} {'type': 'loss', 'content': 0.06003807485103607, 'timestamp': '2025-09-30 22:36:05.507825', 'step': 19353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.543821', 'step': 19353, 'epoch': 3} {'type': 'loss', 'content': 0.0375041738152504, 'timestamp': '2025-09-30 22:36:05.549544', 'step': 19354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.581709', 'step': 19354, 'epoch': 3} {'type': 'loss', 'content': 0.1527503877878189, 'timestamp': '2025-09-30 22:36:05.585518', 'step': 19355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.616776', 'step': 19355, 'epoch': 3} {'type': 'loss', 'content': 0.04980069771409035, 'timestamp': '2025-09-30 22:36:05.643482', 'step': 19356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.674760', 'step': 19356, 'epoch': 3} {'type': 'loss', 'content': 0.1256227344274521, 'timestamp': '2025-09-30 22:36:05.678456', 'step': 19357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:05.710219', 'step': 19357, 'epoch': 3} {'type': 'loss', 'content': 0.059001948684453964, 'timestamp': '2025-09-30 22:36:05.714666', 'step': 19358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:05.746312', 'step': 19358, 'epoch': 3} {'type': 'loss', 'content': 0.03693797066807747, 'timestamp': '2025-09-30 22:36:05.751519', 'step': 19359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.783511', 'step': 19359, 'epoch': 3} {'type': 'loss', 'content': 0.04008409380912781, 'timestamp': '2025-09-30 22:36:05.808891', 'step': 19360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:05.840327', 'step': 19360, 'epoch': 3} {'type': 'loss', 'content': 0.09348945319652557, 'timestamp': '2025-09-30 22:36:05.849875', 'step': 19361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.896388', 'step': 19361, 'epoch': 3} {'type': 'loss', 'content': 0.11222122609615326, 'timestamp': '2025-09-30 22:36:05.899892', 'step': 19362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.936758', 'step': 19362, 'epoch': 3} {'type': 'loss', 'content': 0.07246068120002747, 'timestamp': '2025-09-30 22:36:05.945816', 'step': 19363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:05.977277', 'step': 19363, 'epoch': 3} {'type': 'loss', 'content': 0.103940948843956, 'timestamp': '2025-09-30 22:36:06.003391', 'step': 19364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:06.036202', 'step': 19364, 'epoch': 3} {'type': 'loss', 'content': 0.08117636293172836, 'timestamp': '2025-09-30 22:36:06.047978', 'step': 19365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.080139', 'step': 19365, 'epoch': 3} {'type': 'loss', 'content': 0.07354360073804855, 'timestamp': '2025-09-30 22:36:06.085350', 'step': 19366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.117233', 'step': 19366, 'epoch': 3} {'type': 'loss', 'content': 0.06267144531011581, 'timestamp': '2025-09-30 22:36:06.128448', 'step': 19367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:06.165992', 'step': 19367, 'epoch': 3} {'type': 'loss', 'content': 0.09323661029338837, 'timestamp': '2025-09-30 22:36:06.191397', 'step': 19368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:06.222547', 'step': 19368, 'epoch': 3} {'type': 'loss', 'content': 0.06312741339206696, 'timestamp': '2025-09-30 22:36:06.234982', 'step': 19369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:06.271849', 'step': 19369, 'epoch': 3} {'type': 'loss', 'content': 0.09007567167282104, 'timestamp': '2025-09-30 22:36:06.276894', 'step': 19370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:06.310477', 'step': 19370, 'epoch': 3} {'type': 'loss', 'content': 0.0646507740020752, 'timestamp': '2025-09-30 22:36:06.313264', 'step': 19371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:06.349224', 'step': 19371, 'epoch': 3} {'type': 'loss', 'content': 0.03706015273928642, 'timestamp': '2025-09-30 22:36:06.374777', 'step': 19372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.407506', 'step': 19372, 'epoch': 3} {'type': 'loss', 'content': 0.05553589388728142, 'timestamp': '2025-09-30 22:36:06.411976', 'step': 19373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.451277', 'step': 19373, 'epoch': 3} {'type': 'loss', 'content': 0.05750646814703941, 'timestamp': '2025-09-30 22:36:06.465281', 'step': 19374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:06.498111', 'step': 19374, 'epoch': 3} {'type': 'loss', 'content': 0.11987220495939255, 'timestamp': '2025-09-30 22:36:06.503233', 'step': 19375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.538567', 'step': 19375, 'epoch': 3} {'type': 'loss', 'content': 0.07030300796031952, 'timestamp': '2025-09-30 22:36:06.563071', 'step': 19376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.599683', 'step': 19376, 'epoch': 3} {'type': 'loss', 'content': 0.05786396935582161, 'timestamp': '2025-09-30 22:36:06.613398', 'step': 19377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:06.650942', 'step': 19377, 'epoch': 3} {'type': 'loss', 'content': 0.07010769098997116, 'timestamp': '2025-09-30 22:36:06.654272', 'step': 19378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:06.686220', 'step': 19378, 'epoch': 3} {'type': 'loss', 'content': 0.03431764617562294, 'timestamp': '2025-09-30 22:36:06.690513', 'step': 19379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.724171', 'step': 19379, 'epoch': 3} {'type': 'loss', 'content': 0.12258315831422806, 'timestamp': '2025-09-30 22:36:06.749871', 'step': 19380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.779799', 'step': 19380, 'epoch': 3} {'type': 'loss', 'content': 0.1135043129324913, 'timestamp': '2025-09-30 22:36:06.785335', 'step': 19381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.826648', 'step': 19381, 'epoch': 3} {'type': 'loss', 'content': 0.07890400290489197, 'timestamp': '2025-09-30 22:36:06.831902', 'step': 19382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.861917', 'step': 19382, 'epoch': 3} {'type': 'loss', 'content': 0.08216952532529831, 'timestamp': '2025-09-30 22:36:06.871871', 'step': 19383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.913135', 'step': 19383, 'epoch': 3} {'type': 'loss', 'content': 0.10888662934303284, 'timestamp': '2025-09-30 22:36:06.937969', 'step': 19384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:06.968906', 'step': 19384, 'epoch': 3} {'type': 'loss', 'content': 0.03746211156249046, 'timestamp': '2025-09-30 22:36:06.972839', 'step': 19385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.021271', 'step': 19385, 'epoch': 3} {'type': 'loss', 'content': 0.08381436765193939, 'timestamp': '2025-09-30 22:36:07.025758', 'step': 19386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:07.066589', 'step': 19386, 'epoch': 3} {'type': 'loss', 'content': 0.10113618522882462, 'timestamp': '2025-09-30 22:36:07.071514', 'step': 19387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:36:07.104022', 'step': 19387, 'epoch': 3} {'type': 'loss', 'content': 0.05258376523852348, 'timestamp': '2025-09-30 22:36:07.132136', 'step': 19388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.163347', 'step': 19388, 'epoch': 3} {'type': 'loss', 'content': 0.024256275966763496, 'timestamp': '2025-09-30 22:36:07.167102', 'step': 19389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:07.197620', 'step': 19389, 'epoch': 3} {'type': 'loss', 'content': 0.08010393381118774, 'timestamp': '2025-09-30 22:36:07.200465', 'step': 19390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.243900', 'step': 19390, 'epoch': 3} {'type': 'loss', 'content': 0.061727333813905716, 'timestamp': '2025-09-30 22:36:07.246718', 'step': 19391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.287344', 'step': 19391, 'epoch': 3} {'type': 'loss', 'content': 0.08004087954759598, 'timestamp': '2025-09-30 22:36:07.318373', 'step': 19392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.355722', 'step': 19392, 'epoch': 3} {'type': 'loss', 'content': 0.12957824766635895, 'timestamp': '2025-09-30 22:36:07.360400', 'step': 19393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:07.403222', 'step': 19393, 'epoch': 3} {'type': 'loss', 'content': 0.07854298502206802, 'timestamp': '2025-09-30 22:36:07.405635', 'step': 19394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.438496', 'step': 19394, 'epoch': 3} {'type': 'loss', 'content': 0.04255963861942291, 'timestamp': '2025-09-30 22:36:07.454754', 'step': 19395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:07.487163', 'step': 19395, 'epoch': 3} {'type': 'loss', 'content': 0.039206940680742264, 'timestamp': '2025-09-30 22:36:07.513033', 'step': 19396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.544018', 'step': 19396, 'epoch': 3} {'type': 'loss', 'content': 0.03630698099732399, 'timestamp': '2025-09-30 22:36:07.560955', 'step': 19397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.600756', 'step': 19397, 'epoch': 3} {'type': 'loss', 'content': 0.042703550308942795, 'timestamp': '2025-09-30 22:36:07.605227', 'step': 19398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.640440', 'step': 19398, 'epoch': 3} {'type': 'loss', 'content': 0.0341443195939064, 'timestamp': '2025-09-30 22:36:07.646300', 'step': 19399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:07.678482', 'step': 19399, 'epoch': 3} {'type': 'loss', 'content': 0.10213901102542877, 'timestamp': '2025-09-30 22:36:07.704335', 'step': 19400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.741696', 'step': 19400, 'epoch': 3} {'type': 'loss', 'content': 0.08851480484008789, 'timestamp': '2025-09-30 22:36:07.744998', 'step': 19401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.777533', 'step': 19401, 'epoch': 3} {'type': 'loss', 'content': 0.02389904111623764, 'timestamp': '2025-09-30 22:36:07.782306', 'step': 19402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.814736', 'step': 19402, 'epoch': 3} {'type': 'loss', 'content': 0.08494998514652252, 'timestamp': '2025-09-30 22:36:07.817341', 'step': 19403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:07.851852', 'step': 19403, 'epoch': 3} {'type': 'loss', 'content': 0.11287269741296768, 'timestamp': '2025-09-30 22:36:07.875996', 'step': 19404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.908270', 'step': 19404, 'epoch': 3} {'type': 'loss', 'content': 0.0168332327157259, 'timestamp': '2025-09-30 22:36:07.914573', 'step': 19405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:07.948295', 'step': 19405, 'epoch': 3} {'type': 'loss', 'content': 0.08334047347307205, 'timestamp': '2025-09-30 22:36:07.951301', 'step': 19406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:07.982575', 'step': 19406, 'epoch': 3} {'type': 'loss', 'content': 0.10816044360399246, 'timestamp': '2025-09-30 22:36:07.986619', 'step': 19407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:08.027206', 'step': 19407, 'epoch': 3} {'type': 'loss', 'content': 0.05825936049222946, 'timestamp': '2025-09-30 22:36:08.063766', 'step': 19408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.101388', 'step': 19408, 'epoch': 3} {'type': 'loss', 'content': 0.04685863107442856, 'timestamp': '2025-09-30 22:36:08.107984', 'step': 19409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:08.146216', 'step': 19409, 'epoch': 3} {'type': 'loss', 'content': 0.06419217586517334, 'timestamp': '2025-09-30 22:36:08.149650', 'step': 19410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.181005', 'step': 19410, 'epoch': 3} {'type': 'loss', 'content': 0.04675139859318733, 'timestamp': '2025-09-30 22:36:08.185323', 'step': 19411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.221145', 'step': 19411, 'epoch': 3} {'type': 'loss', 'content': 0.03952290490269661, 'timestamp': '2025-09-30 22:36:08.247495', 'step': 19412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.281650', 'step': 19412, 'epoch': 3} {'type': 'loss', 'content': 0.08790702372789383, 'timestamp': '2025-09-30 22:36:08.287281', 'step': 19413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:08.322165', 'step': 19413, 'epoch': 3} {'type': 'loss', 'content': 0.021875521168112755, 'timestamp': '2025-09-30 22:36:08.325576', 'step': 19414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.362682', 'step': 19414, 'epoch': 3} {'type': 'loss', 'content': 0.06291382759809494, 'timestamp': '2025-09-30 22:36:08.367126', 'step': 19415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.399265', 'step': 19415, 'epoch': 3} {'type': 'loss', 'content': 0.06595951318740845, 'timestamp': '2025-09-30 22:36:08.436341', 'step': 19416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:08.476163', 'step': 19416, 'epoch': 3} {'type': 'loss', 'content': 0.12198968976736069, 'timestamp': '2025-09-30 22:36:08.492491', 'step': 19417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.527422', 'step': 19417, 'epoch': 3} {'type': 'loss', 'content': 0.07485678791999817, 'timestamp': '2025-09-30 22:36:08.529925', 'step': 19418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.565459', 'step': 19418, 'epoch': 3} {'type': 'loss', 'content': 0.09888433665037155, 'timestamp': '2025-09-30 22:36:08.569863', 'step': 19419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:08.612212', 'step': 19419, 'epoch': 3} {'type': 'loss', 'content': 0.05484385043382645, 'timestamp': '2025-09-30 22:36:08.642279', 'step': 19420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.677730', 'step': 19420, 'epoch': 3} {'type': 'loss', 'content': 0.10073717683553696, 'timestamp': '2025-09-30 22:36:08.684465', 'step': 19421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:08.715339', 'step': 19421, 'epoch': 3} {'type': 'loss', 'content': 0.053807951509952545, 'timestamp': '2025-09-30 22:36:08.727486', 'step': 19422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.771685', 'step': 19422, 'epoch': 3} {'type': 'loss', 'content': 0.12947431206703186, 'timestamp': '2025-09-30 22:36:08.780602', 'step': 19423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:08.819300', 'step': 19423, 'epoch': 3} {'type': 'loss', 'content': 0.03987526521086693, 'timestamp': '2025-09-30 22:36:08.847484', 'step': 19424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.897575', 'step': 19424, 'epoch': 3} {'type': 'loss', 'content': 0.06853250414133072, 'timestamp': '2025-09-30 22:36:08.900982', 'step': 19425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.930863', 'step': 19425, 'epoch': 3} {'type': 'loss', 'content': 0.07816576212644577, 'timestamp': '2025-09-30 22:36:08.934120', 'step': 19426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:08.965489', 'step': 19426, 'epoch': 3} {'type': 'loss', 'content': 0.08478367328643799, 'timestamp': '2025-09-30 22:36:08.971019', 'step': 19427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.005620', 'step': 19427, 'epoch': 3} {'type': 'loss', 'content': 0.08720850944519043, 'timestamp': '2025-09-30 22:36:09.031673', 'step': 19428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:09.070587', 'step': 19428, 'epoch': 3} {'type': 'loss', 'content': 0.04661053791642189, 'timestamp': '2025-09-30 22:36:09.074930', 'step': 19429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.105483', 'step': 19429, 'epoch': 3} {'type': 'loss', 'content': 0.1188880130648613, 'timestamp': '2025-09-30 22:36:09.110097', 'step': 19430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:09.144086', 'step': 19430, 'epoch': 3} {'type': 'loss', 'content': 0.07063847035169601, 'timestamp': '2025-09-30 22:36:09.146931', 'step': 19431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:09.179716', 'step': 19431, 'epoch': 3} {'type': 'loss', 'content': 0.09893617033958435, 'timestamp': '2025-09-30 22:36:09.204355', 'step': 19432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.238436', 'step': 19432, 'epoch': 3} {'type': 'loss', 'content': 0.08432471752166748, 'timestamp': '2025-09-30 22:36:09.241398', 'step': 19433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.279813', 'step': 19433, 'epoch': 3} {'type': 'loss', 'content': 0.06409477442502975, 'timestamp': '2025-09-30 22:36:09.294575', 'step': 19434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.337468', 'step': 19434, 'epoch': 3} {'type': 'loss', 'content': 0.08723283559083939, 'timestamp': '2025-09-30 22:36:09.352771', 'step': 19435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.393162', 'step': 19435, 'epoch': 3} {'type': 'loss', 'content': 0.09275289624929428, 'timestamp': '2025-09-30 22:36:09.428217', 'step': 19436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.477417', 'step': 19436, 'epoch': 3} {'type': 'loss', 'content': 0.05031612142920494, 'timestamp': '2025-09-30 22:36:09.481844', 'step': 19437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.513279', 'step': 19437, 'epoch': 3} {'type': 'loss', 'content': 0.05921373516321182, 'timestamp': '2025-09-30 22:36:09.522602', 'step': 19438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.556770', 'step': 19438, 'epoch': 3} {'type': 'loss', 'content': 0.06958482414484024, 'timestamp': '2025-09-30 22:36:09.569343', 'step': 19439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:09.600608', 'step': 19439, 'epoch': 3} {'type': 'loss', 'content': 0.08086218684911728, 'timestamp': '2025-09-30 22:36:09.625173', 'step': 19440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:09.670074', 'step': 19440, 'epoch': 3} {'type': 'loss', 'content': 0.04840916395187378, 'timestamp': '2025-09-30 22:36:09.678326', 'step': 19441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.711586', 'step': 19441, 'epoch': 3} {'type': 'loss', 'content': 0.09088991582393646, 'timestamp': '2025-09-30 22:36:09.715328', 'step': 19442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.748540', 'step': 19442, 'epoch': 3} {'type': 'loss', 'content': 0.07225580513477325, 'timestamp': '2025-09-30 22:36:09.752432', 'step': 19443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:09.785524', 'step': 19443, 'epoch': 3} {'type': 'loss', 'content': 0.05222555622458458, 'timestamp': '2025-09-30 22:36:09.809597', 'step': 19444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:09.841844', 'step': 19444, 'epoch': 3} {'type': 'loss', 'content': 0.015298430807888508, 'timestamp': '2025-09-30 22:36:09.847108', 'step': 19445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:09.879746', 'step': 19445, 'epoch': 3} {'type': 'loss', 'content': 0.05865047127008438, 'timestamp': '2025-09-30 22:36:09.883139', 'step': 19446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:09.913637', 'step': 19446, 'epoch': 3} {'type': 'loss', 'content': 0.09834352135658264, 'timestamp': '2025-09-30 22:36:09.916699', 'step': 19447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:09.961060', 'step': 19447, 'epoch': 3} {'type': 'loss', 'content': 0.034456364810466766, 'timestamp': '2025-09-30 22:36:09.985388', 'step': 19448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:10.017944', 'step': 19448, 'epoch': 3} {'type': 'loss', 'content': 0.0699307769536972, 'timestamp': '2025-09-30 22:36:10.030936', 'step': 19449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:10.062495', 'step': 19449, 'epoch': 3} {'type': 'loss', 'content': 0.07993824779987335, 'timestamp': '2025-09-30 22:36:10.067115', 'step': 19450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:10.102743', 'step': 19450, 'epoch': 3} {'type': 'loss', 'content': 0.04014226049184799, 'timestamp': '2025-09-30 22:36:10.105681', 'step': 19451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:10.137281', 'step': 19451, 'epoch': 3} {'type': 'loss', 'content': 0.07187439501285553, 'timestamp': '2025-09-30 22:36:10.162997', 'step': 19452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:10.193740', 'step': 19452, 'epoch': 3} {'type': 'loss', 'content': 0.03908921405673027, 'timestamp': '2025-09-30 22:36:10.198043', 'step': 19453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:10.243340', 'step': 19453, 'epoch': 3} {'type': 'loss', 'content': 0.15763495862483978, 'timestamp': '2025-09-30 22:36:10.260236', 'step': 19454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.292438', 'step': 19454, 'epoch': 3} {'type': 'loss', 'content': 0.08335309475660324, 'timestamp': '2025-09-30 22:36:10.296389', 'step': 19455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:10.328060', 'step': 19455, 'epoch': 3} {'type': 'loss', 'content': 0.09003414958715439, 'timestamp': '2025-09-30 22:36:10.358471', 'step': 19456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.397012', 'step': 19456, 'epoch': 3} {'type': 'loss', 'content': 0.05502535402774811, 'timestamp': '2025-09-30 22:36:10.411305', 'step': 19457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:10.443811', 'step': 19457, 'epoch': 3} {'type': 'loss', 'content': 0.11163122206926346, 'timestamp': '2025-09-30 22:36:10.446858', 'step': 19458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.487736', 'step': 19458, 'epoch': 3} {'type': 'loss', 'content': 0.05501995235681534, 'timestamp': '2025-09-30 22:36:10.493043', 'step': 19459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:10.525578', 'step': 19459, 'epoch': 3} {'type': 'loss', 'content': 0.04093502461910248, 'timestamp': '2025-09-30 22:36:10.564183', 'step': 19460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:10.602187', 'step': 19460, 'epoch': 3} {'type': 'loss', 'content': 0.11704465001821518, 'timestamp': '2025-09-30 22:36:10.615722', 'step': 19461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:36:10.669829', 'step': 19461, 'epoch': 3} {'type': 'loss', 'content': 0.033027153462171555, 'timestamp': '2025-09-30 22:36:10.683458', 'step': 19462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.729724', 'step': 19462, 'epoch': 3} {'type': 'loss', 'content': 0.12043517827987671, 'timestamp': '2025-09-30 22:36:10.741243', 'step': 19463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.773232', 'step': 19463, 'epoch': 3} {'type': 'loss', 'content': 0.02775081805884838, 'timestamp': '2025-09-30 22:36:10.807983', 'step': 19464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.837907', 'step': 19464, 'epoch': 3} {'type': 'loss', 'content': 0.11925028264522552, 'timestamp': '2025-09-30 22:36:10.842005', 'step': 19465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.876608', 'step': 19465, 'epoch': 3} {'type': 'loss', 'content': 0.03606852516531944, 'timestamp': '2025-09-30 22:36:10.887332', 'step': 19466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.925774', 'step': 19466, 'epoch': 3} {'type': 'loss', 'content': 0.07244051247835159, 'timestamp': '2025-09-30 22:36:10.935112', 'step': 19467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:10.971780', 'step': 19467, 'epoch': 3} {'type': 'loss', 'content': 0.08178200572729111, 'timestamp': '2025-09-30 22:36:10.998496', 'step': 19468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:11.031708', 'step': 19468, 'epoch': 3} {'type': 'loss', 'content': 0.0564412921667099, 'timestamp': '2025-09-30 22:36:11.036327', 'step': 19469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:11.077615', 'step': 19469, 'epoch': 3} {'type': 'loss', 'content': 0.09230255335569382, 'timestamp': '2025-09-30 22:36:11.080488', 'step': 19470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.113328', 'step': 19470, 'epoch': 3} {'type': 'loss', 'content': 0.021492458879947662, 'timestamp': '2025-09-30 22:36:11.127301', 'step': 19471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:11.161274', 'step': 19471, 'epoch': 3} {'type': 'loss', 'content': 0.02940528467297554, 'timestamp': '2025-09-30 22:36:11.186565', 'step': 19472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.226274', 'step': 19472, 'epoch': 3} {'type': 'loss', 'content': 0.05820881575345993, 'timestamp': '2025-09-30 22:36:11.230946', 'step': 19473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.262797', 'step': 19473, 'epoch': 3} {'type': 'loss', 'content': 0.05816895142197609, 'timestamp': '2025-09-30 22:36:11.281241', 'step': 19474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.321005', 'step': 19474, 'epoch': 3} {'type': 'loss', 'content': 0.06947395950555801, 'timestamp': '2025-09-30 22:36:11.328227', 'step': 19475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:11.365281', 'step': 19475, 'epoch': 3} {'type': 'loss', 'content': 0.04425659403204918, 'timestamp': '2025-09-30 22:36:11.390447', 'step': 19476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.421616', 'step': 19476, 'epoch': 3} {'type': 'loss', 'content': 0.11304921656847, 'timestamp': '2025-09-30 22:36:11.428814', 'step': 19477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:11.460270', 'step': 19477, 'epoch': 3} {'type': 'loss', 'content': 0.0390414223074913, 'timestamp': '2025-09-30 22:36:11.465747', 'step': 19478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:11.497162', 'step': 19478, 'epoch': 3} {'type': 'loss', 'content': 0.023808281868696213, 'timestamp': '2025-09-30 22:36:11.505395', 'step': 19479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.544884', 'step': 19479, 'epoch': 3} {'type': 'loss', 'content': 0.05205678194761276, 'timestamp': '2025-09-30 22:36:11.572777', 'step': 19480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.606791', 'step': 19480, 'epoch': 3} {'type': 'loss', 'content': 0.03236835077404976, 'timestamp': '2025-09-30 22:36:11.610655', 'step': 19481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.648701', 'step': 19481, 'epoch': 3} {'type': 'loss', 'content': 0.041079770773649216, 'timestamp': '2025-09-30 22:36:11.651587', 'step': 19482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.683555', 'step': 19482, 'epoch': 3} {'type': 'loss', 'content': 0.06329408288002014, 'timestamp': '2025-09-30 22:36:11.686778', 'step': 19483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.725178', 'step': 19483, 'epoch': 3} {'type': 'loss', 'content': 0.10254417359828949, 'timestamp': '2025-09-30 22:36:11.750789', 'step': 19484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.787267', 'step': 19484, 'epoch': 3} {'type': 'loss', 'content': 0.07653006166219711, 'timestamp': '2025-09-30 22:36:11.790731', 'step': 19485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:11.822695', 'step': 19485, 'epoch': 3} {'type': 'loss', 'content': 0.03796692565083504, 'timestamp': '2025-09-30 22:36:11.829374', 'step': 19486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.865033', 'step': 19486, 'epoch': 3} {'type': 'loss', 'content': 0.08303148299455643, 'timestamp': '2025-09-30 22:36:11.877245', 'step': 19487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.911654', 'step': 19487, 'epoch': 3} {'type': 'loss', 'content': 0.02526891976594925, 'timestamp': '2025-09-30 22:36:11.939294', 'step': 19488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:11.972318', 'step': 19488, 'epoch': 3} {'type': 'loss', 'content': 0.054615866392850876, 'timestamp': '2025-09-30 22:36:11.979117', 'step': 19489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:12.015314', 'step': 19489, 'epoch': 3} {'type': 'loss', 'content': 0.06252745538949966, 'timestamp': '2025-09-30 22:36:12.025300', 'step': 19490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:12.060171', 'step': 19490, 'epoch': 3} {'type': 'loss', 'content': 0.016402820125222206, 'timestamp': '2025-09-30 22:36:12.067737', 'step': 19491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:12.105719', 'step': 19491, 'epoch': 3} {'type': 'loss', 'content': 0.05204203352332115, 'timestamp': '2025-09-30 22:36:12.130367', 'step': 19492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:12.162419', 'step': 19492, 'epoch': 3} {'type': 'loss', 'content': 0.11448934674263, 'timestamp': '2025-09-30 22:36:12.176107', 'step': 19493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:12.209681', 'step': 19493, 'epoch': 3} {'type': 'loss', 'content': 0.056732192635536194, 'timestamp': '2025-09-30 22:36:12.216814', 'step': 19494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:12.250895', 'step': 19494, 'epoch': 3} {'type': 'loss', 'content': 0.06520570814609528, 'timestamp': '2025-09-30 22:36:12.254232', 'step': 19495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:12.287338', 'step': 19495, 'epoch': 3} {'type': 'loss', 'content': 0.019313612952828407, 'timestamp': '2025-09-30 22:36:12.313325', 'step': 19496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:12.345332', 'step': 19496, 'epoch': 3} {'type': 'loss', 'content': 0.04053998365998268, 'timestamp': '2025-09-30 22:36:12.353051', 'step': 19497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:12.398217', 'step': 19497, 'epoch': 3} {'type': 'loss', 'content': 0.06500010192394257, 'timestamp': '2025-09-30 22:36:12.400627', 'step': 19498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:12.431913', 'step': 19498, 'epoch': 3} {'type': 'loss', 'content': 0.023892128840088844, 'timestamp': '2025-09-30 22:36:12.435736', 'step': 19499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:12.471101', 'step': 19499, 'epoch': 3} {'type': 'loss', 'content': 0.04405001550912857, 'timestamp': '2025-09-30 22:36:12.495437', 'step': 19500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 19500', 'timestamp': '2025-09-30 22:36:17.701925', 'step': 19500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:17.742235', 'step': 19500, 'epoch': 3} {'type': 'loss', 'content': 0.035792235285043716, 'timestamp': '2025-09-30 22:36:17.747417', 'step': 19501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:17.781739', 'step': 19501, 'epoch': 3} {'type': 'loss', 'content': 0.11413674801588058, 'timestamp': '2025-09-30 22:36:17.787620', 'step': 19502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:17.820870', 'step': 19502, 'epoch': 3} {'type': 'loss', 'content': 0.0903976559638977, 'timestamp': '2025-09-30 22:36:17.828914', 'step': 19503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:17.862779', 'step': 19503, 'epoch': 3} {'type': 'loss', 'content': 0.04853234067559242, 'timestamp': '2025-09-30 22:36:17.888750', 'step': 19504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:17.931454', 'step': 19504, 'epoch': 3} {'type': 'loss', 'content': 0.07890534400939941, 'timestamp': '2025-09-30 22:36:17.945099', 'step': 19505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:17.977693', 'step': 19505, 'epoch': 3} {'type': 'loss', 'content': 0.03465631976723671, 'timestamp': '2025-09-30 22:36:17.982224', 'step': 19506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:18.014309', 'step': 19506, 'epoch': 3} {'type': 'loss', 'content': 0.034614790230989456, 'timestamp': '2025-09-30 22:36:18.017958', 'step': 19507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.054824', 'step': 19507, 'epoch': 3} {'type': 'loss', 'content': 0.0723370909690857, 'timestamp': '2025-09-30 22:36:18.079985', 'step': 19508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:18.110681', 'step': 19508, 'epoch': 3} {'type': 'loss', 'content': 0.07264792174100876, 'timestamp': '2025-09-30 22:36:18.118312', 'step': 19509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.152232', 'step': 19509, 'epoch': 3} {'type': 'loss', 'content': 0.08644550293684006, 'timestamp': '2025-09-30 22:36:18.154762', 'step': 19510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:18.189184', 'step': 19510, 'epoch': 3} {'type': 'loss', 'content': 0.012678844854235649, 'timestamp': '2025-09-30 22:36:18.193458', 'step': 19511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:36:18.225308', 'step': 19511, 'epoch': 3} {'type': 'loss', 'content': 0.03560936823487282, 'timestamp': '2025-09-30 22:36:18.252434', 'step': 19512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.283813', 'step': 19512, 'epoch': 3} {'type': 'loss', 'content': 0.03935287520289421, 'timestamp': '2025-09-30 22:36:18.289054', 'step': 19513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.322684', 'step': 19513, 'epoch': 3} {'type': 'loss', 'content': 0.14412787556648254, 'timestamp': '2025-09-30 22:36:18.336733', 'step': 19514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:18.367609', 'step': 19514, 'epoch': 3} {'type': 'loss', 'content': 0.048988960683345795, 'timestamp': '2025-09-30 22:36:18.375489', 'step': 19515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.413715', 'step': 19515, 'epoch': 3} {'type': 'loss', 'content': 0.03685902804136276, 'timestamp': '2025-09-30 22:36:18.440819', 'step': 19516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.477870', 'step': 19516, 'epoch': 3} {'type': 'loss', 'content': 0.028544152155518532, 'timestamp': '2025-09-30 22:36:18.482767', 'step': 19517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.519459', 'step': 19517, 'epoch': 3} {'type': 'loss', 'content': 0.040778063237667084, 'timestamp': '2025-09-30 22:36:18.525411', 'step': 19518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:18.560168', 'step': 19518, 'epoch': 3} {'type': 'loss', 'content': 0.08065720647573471, 'timestamp': '2025-09-30 22:36:18.573884', 'step': 19519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:18.604464', 'step': 19519, 'epoch': 3} {'type': 'loss', 'content': 0.10755996406078339, 'timestamp': '2025-09-30 22:36:18.633409', 'step': 19520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.674333', 'step': 19520, 'epoch': 3} {'type': 'loss', 'content': 0.06378521770238876, 'timestamp': '2025-09-30 22:36:18.680249', 'step': 19521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:18.713417', 'step': 19521, 'epoch': 3} {'type': 'loss', 'content': 0.06744381785392761, 'timestamp': '2025-09-30 22:36:18.716109', 'step': 19522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:18.747320', 'step': 19522, 'epoch': 3} {'type': 'loss', 'content': 0.10701006650924683, 'timestamp': '2025-09-30 22:36:18.752505', 'step': 19523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:18.784487', 'step': 19523, 'epoch': 3} {'type': 'loss', 'content': 0.06266824901103973, 'timestamp': '2025-09-30 22:36:18.809401', 'step': 19524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.840494', 'step': 19524, 'epoch': 3} {'type': 'loss', 'content': 0.05610877647995949, 'timestamp': '2025-09-30 22:36:18.844484', 'step': 19525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.877031', 'step': 19525, 'epoch': 3} {'type': 'loss', 'content': 0.06826766580343246, 'timestamp': '2025-09-30 22:36:18.880600', 'step': 19526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:18.915104', 'step': 19526, 'epoch': 3} {'type': 'loss', 'content': 0.07107651233673096, 'timestamp': '2025-09-30 22:36:18.925986', 'step': 19527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:18.962801', 'step': 19527, 'epoch': 3} {'type': 'loss', 'content': 0.02609640546143055, 'timestamp': '2025-09-30 22:36:18.987668', 'step': 19528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.021125', 'step': 19528, 'epoch': 3} {'type': 'loss', 'content': 0.021852927282452583, 'timestamp': '2025-09-30 22:36:19.025707', 'step': 19529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:19.066187', 'step': 19529, 'epoch': 3} {'type': 'loss', 'content': 0.06819113343954086, 'timestamp': '2025-09-30 22:36:19.070047', 'step': 19530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.101087', 'step': 19530, 'epoch': 3} {'type': 'loss', 'content': 0.06522859632968903, 'timestamp': '2025-09-30 22:36:19.107078', 'step': 19531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.142113', 'step': 19531, 'epoch': 3} {'type': 'loss', 'content': 0.09486480057239532, 'timestamp': '2025-09-30 22:36:19.168012', 'step': 19532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.202715', 'step': 19532, 'epoch': 3} {'type': 'loss', 'content': 0.13884085416793823, 'timestamp': '2025-09-30 22:36:19.207887', 'step': 19533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.244266', 'step': 19533, 'epoch': 3} {'type': 'loss', 'content': 0.05238990858197212, 'timestamp': '2025-09-30 22:36:19.248380', 'step': 19534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:19.283565', 'step': 19534, 'epoch': 3} {'type': 'loss', 'content': 0.05598687008023262, 'timestamp': '2025-09-30 22:36:19.289448', 'step': 19535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:19.332620', 'step': 19535, 'epoch': 3} {'type': 'loss', 'content': 0.01938113383948803, 'timestamp': '2025-09-30 22:36:19.363535', 'step': 19536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:19.394331', 'step': 19536, 'epoch': 3} {'type': 'loss', 'content': 0.03036290593445301, 'timestamp': '2025-09-30 22:36:19.396498', 'step': 19537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:19.439312', 'step': 19537, 'epoch': 3} {'type': 'loss', 'content': 0.009815709665417671, 'timestamp': '2025-09-30 22:36:19.445543', 'step': 19538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:19.480525', 'step': 19538, 'epoch': 3} {'type': 'loss', 'content': 0.04077853634953499, 'timestamp': '2025-09-30 22:36:19.483060', 'step': 19539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.513251', 'step': 19539, 'epoch': 3} {'type': 'loss', 'content': 0.11277274042367935, 'timestamp': '2025-09-30 22:36:19.544066', 'step': 19540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.577699', 'step': 19540, 'epoch': 3} {'type': 'loss', 'content': 0.05202513188123703, 'timestamp': '2025-09-30 22:36:19.580188', 'step': 19541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:19.612531', 'step': 19541, 'epoch': 3} {'type': 'loss', 'content': 0.0462227389216423, 'timestamp': '2025-09-30 22:36:19.614904', 'step': 19542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.649111', 'step': 19542, 'epoch': 3} {'type': 'loss', 'content': 0.054075341671705246, 'timestamp': '2025-09-30 22:36:19.651292', 'step': 19543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:19.698566', 'step': 19543, 'epoch': 3} {'type': 'loss', 'content': 0.020101770758628845, 'timestamp': '2025-09-30 22:36:19.722990', 'step': 19544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:19.755591', 'step': 19544, 'epoch': 3} {'type': 'loss', 'content': 0.1420438438653946, 'timestamp': '2025-09-30 22:36:19.758471', 'step': 19545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:19.792492', 'step': 19545, 'epoch': 3} {'type': 'loss', 'content': 0.1241520494222641, 'timestamp': '2025-09-30 22:36:19.795846', 'step': 19546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:19.828493', 'step': 19546, 'epoch': 3} {'type': 'loss', 'content': 0.02465817704796791, 'timestamp': '2025-09-30 22:36:19.831106', 'step': 19547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:19.862209', 'step': 19547, 'epoch': 3} {'type': 'loss', 'content': 0.0057721384800970554, 'timestamp': '2025-09-30 22:36:19.886707', 'step': 19548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:19.920317', 'step': 19548, 'epoch': 3} {'type': 'loss', 'content': 0.07467465102672577, 'timestamp': '2025-09-30 22:36:19.924933', 'step': 19549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:19.967657', 'step': 19549, 'epoch': 3} {'type': 'loss', 'content': 0.08076909929513931, 'timestamp': '2025-09-30 22:36:19.972154', 'step': 19550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.004404', 'step': 19550, 'epoch': 3} {'type': 'loss', 'content': 0.030171064659953117, 'timestamp': '2025-09-30 22:36:20.009921', 'step': 19551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.041730', 'step': 19551, 'epoch': 3} {'type': 'loss', 'content': 0.06871405988931656, 'timestamp': '2025-09-30 22:36:20.066735', 'step': 19552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:20.105775', 'step': 19552, 'epoch': 3} {'type': 'loss', 'content': 0.08519580960273743, 'timestamp': '2025-09-30 22:36:20.112243', 'step': 19553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.148743', 'step': 19553, 'epoch': 3} {'type': 'loss', 'content': 0.02978251501917839, 'timestamp': '2025-09-30 22:36:20.165833', 'step': 19554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:20.216890', 'step': 19554, 'epoch': 3} {'type': 'loss', 'content': 0.04061847925186157, 'timestamp': '2025-09-30 22:36:20.220798', 'step': 19555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:20.252103', 'step': 19555, 'epoch': 3} {'type': 'loss', 'content': 0.03168315067887306, 'timestamp': '2025-09-30 22:36:20.282056', 'step': 19556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.312589', 'step': 19556, 'epoch': 3} {'type': 'loss', 'content': 0.09843698143959045, 'timestamp': '2025-09-30 22:36:20.319618', 'step': 19557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.351495', 'step': 19557, 'epoch': 3} {'type': 'loss', 'content': 0.08760631084442139, 'timestamp': '2025-09-30 22:36:20.353911', 'step': 19558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.384599', 'step': 19558, 'epoch': 3} {'type': 'loss', 'content': 0.05431349575519562, 'timestamp': '2025-09-30 22:36:20.387563', 'step': 19559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:20.418306', 'step': 19559, 'epoch': 3} {'type': 'loss', 'content': 0.13495147228240967, 'timestamp': '2025-09-30 22:36:20.444964', 'step': 19560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.479426', 'step': 19560, 'epoch': 3} {'type': 'loss', 'content': 0.04699907824397087, 'timestamp': '2025-09-30 22:36:20.488413', 'step': 19561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:20.520309', 'step': 19561, 'epoch': 3} {'type': 'loss', 'content': 0.01576284132897854, 'timestamp': '2025-09-30 22:36:20.523005', 'step': 19562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.554016', 'step': 19562, 'epoch': 3} {'type': 'loss', 'content': 0.03212524205446243, 'timestamp': '2025-09-30 22:36:20.560015', 'step': 19563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.593663', 'step': 19563, 'epoch': 3} {'type': 'loss', 'content': 0.04923489689826965, 'timestamp': '2025-09-30 22:36:20.618730', 'step': 19564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:20.650097', 'step': 19564, 'epoch': 3} {'type': 'loss', 'content': 0.08930686116218567, 'timestamp': '2025-09-30 22:36:20.654818', 'step': 19565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.687366', 'step': 19565, 'epoch': 3} {'type': 'loss', 'content': 0.024111256003379822, 'timestamp': '2025-09-30 22:36:20.695306', 'step': 19566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:20.725180', 'step': 19566, 'epoch': 3} {'type': 'loss', 'content': 0.11487787961959839, 'timestamp': '2025-09-30 22:36:20.728571', 'step': 19567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:20.759323', 'step': 19567, 'epoch': 3} {'type': 'loss', 'content': 0.04251571744680405, 'timestamp': '2025-09-30 22:36:20.784293', 'step': 19568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:20.816165', 'step': 19568, 'epoch': 3} {'type': 'loss', 'content': 0.007158988155424595, 'timestamp': '2025-09-30 22:36:20.820230', 'step': 19569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.853586', 'step': 19569, 'epoch': 3} {'type': 'loss', 'content': 0.05919273570179939, 'timestamp': '2025-09-30 22:36:20.857023', 'step': 19570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.896100', 'step': 19570, 'epoch': 3} {'type': 'loss', 'content': 0.12316886335611343, 'timestamp': '2025-09-30 22:36:20.900320', 'step': 19571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:20.930667', 'step': 19571, 'epoch': 3} {'type': 'loss', 'content': 0.05032283812761307, 'timestamp': '2025-09-30 22:36:20.957292', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:36:28.307557', 'step': 19572, 'epoch': 3} {'type': 'pplx', 'content': 9520.282593771502, 'timestamp': '2025-09-30 22:36:28.311143', 'step': 19572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:28.340499', 'step': 19572, 'epoch': 3} {'type': 'loss', 'content': 0.07143961638212204, 'timestamp': '2025-09-30 22:36:28.347085', 'step': 19573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.377867', 'step': 19573, 'epoch': 3} {'type': 'loss', 'content': 0.10538722574710846, 'timestamp': '2025-09-30 22:36:28.384783', 'step': 19574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:28.416448', 'step': 19574, 'epoch': 3} {'type': 'loss', 'content': 0.08707033842802048, 'timestamp': '2025-09-30 22:36:28.419057', 'step': 19575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:28.458931', 'step': 19575, 'epoch': 3} {'type': 'loss', 'content': 0.029977789148688316, 'timestamp': '2025-09-30 22:36:28.483571', 'step': 19576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.525414', 'step': 19576, 'epoch': 3} {'type': 'loss', 'content': 0.04471307992935181, 'timestamp': '2025-09-30 22:36:28.528820', 'step': 19577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.561250', 'step': 19577, 'epoch': 3} {'type': 'loss', 'content': 0.14273899793624878, 'timestamp': '2025-09-30 22:36:28.570216', 'step': 19578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:28.606650', 'step': 19578, 'epoch': 3} {'type': 'loss', 'content': 0.06508380174636841, 'timestamp': '2025-09-30 22:36:28.613905', 'step': 19579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:28.650021', 'step': 19579, 'epoch': 3} {'type': 'loss', 'content': 0.05655137076973915, 'timestamp': '2025-09-30 22:36:28.675280', 'step': 19580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.706879', 'step': 19580, 'epoch': 3} {'type': 'loss', 'content': 0.028171682730317116, 'timestamp': '2025-09-30 22:36:28.709907', 'step': 19581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.740457', 'step': 19581, 'epoch': 3} {'type': 'loss', 'content': 0.04764014482498169, 'timestamp': '2025-09-30 22:36:28.745030', 'step': 19582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:28.776784', 'step': 19582, 'epoch': 3} {'type': 'loss', 'content': 0.06502944231033325, 'timestamp': '2025-09-30 22:36:28.787472', 'step': 19583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.818764', 'step': 19583, 'epoch': 3} {'type': 'loss', 'content': 0.048288341611623764, 'timestamp': '2025-09-30 22:36:28.843837', 'step': 19584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.874311', 'step': 19584, 'epoch': 3} {'type': 'loss', 'content': 0.03536071255803108, 'timestamp': '2025-09-30 22:36:28.877563', 'step': 19585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.909196', 'step': 19585, 'epoch': 3} {'type': 'loss', 'content': 0.04855137690901756, 'timestamp': '2025-09-30 22:36:28.911472', 'step': 19586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:28.941542', 'step': 19586, 'epoch': 3} {'type': 'loss', 'content': 0.06547176837921143, 'timestamp': '2025-09-30 22:36:28.947540', 'step': 19587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:28.984157', 'step': 19587, 'epoch': 3} {'type': 'loss', 'content': 0.08109910786151886, 'timestamp': '2025-09-30 22:36:29.009100', 'step': 19588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:29.043185', 'step': 19588, 'epoch': 3} {'type': 'loss', 'content': 0.05898085981607437, 'timestamp': '2025-09-30 22:36:29.045897', 'step': 19589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:29.079372', 'step': 19589, 'epoch': 3} {'type': 'loss', 'content': 0.059770602732896805, 'timestamp': '2025-09-30 22:36:29.086710', 'step': 19590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.124945', 'step': 19590, 'epoch': 3} {'type': 'loss', 'content': 0.06672168523073196, 'timestamp': '2025-09-30 22:36:29.135140', 'step': 19591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.174923', 'step': 19591, 'epoch': 3} {'type': 'loss', 'content': 0.12358178943395615, 'timestamp': '2025-09-30 22:36:29.204867', 'step': 19592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:29.246443', 'step': 19592, 'epoch': 3} {'type': 'loss', 'content': 0.07259545475244522, 'timestamp': '2025-09-30 22:36:29.254271', 'step': 19593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:29.286457', 'step': 19593, 'epoch': 3} {'type': 'loss', 'content': 0.06390322744846344, 'timestamp': '2025-09-30 22:36:29.296785', 'step': 19594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:29.332493', 'step': 19594, 'epoch': 3} {'type': 'loss', 'content': 0.11006049066781998, 'timestamp': '2025-09-30 22:36:29.335652', 'step': 19595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.366549', 'step': 19595, 'epoch': 3} {'type': 'loss', 'content': 0.06702102720737457, 'timestamp': '2025-09-30 22:36:29.391052', 'step': 19596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:29.422104', 'step': 19596, 'epoch': 3} {'type': 'loss', 'content': 0.10298926383256912, 'timestamp': '2025-09-30 22:36:29.426330', 'step': 19597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.460400', 'step': 19597, 'epoch': 3} {'type': 'loss', 'content': 0.0712859183549881, 'timestamp': '2025-09-30 22:36:29.465269', 'step': 19598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.516098', 'step': 19598, 'epoch': 3} {'type': 'loss', 'content': 0.15274794399738312, 'timestamp': '2025-09-30 22:36:29.519908', 'step': 19599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:29.565156', 'step': 19599, 'epoch': 3} {'type': 'loss', 'content': 0.050188492983579636, 'timestamp': '2025-09-30 22:36:29.591291', 'step': 19600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:29.631619', 'step': 19600, 'epoch': 3} {'type': 'loss', 'content': 0.03037351556122303, 'timestamp': '2025-09-30 22:36:29.643625', 'step': 19601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.683194', 'step': 19601, 'epoch': 3} {'type': 'loss', 'content': 0.09696748107671738, 'timestamp': '2025-09-30 22:36:29.687470', 'step': 19602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:29.718590', 'step': 19602, 'epoch': 3} {'type': 'loss', 'content': 0.005623252131044865, 'timestamp': '2025-09-30 22:36:29.724139', 'step': 19603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:29.755697', 'step': 19603, 'epoch': 3} {'type': 'loss', 'content': 0.08251550793647766, 'timestamp': '2025-09-30 22:36:29.788485', 'step': 19604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:29.835174', 'step': 19604, 'epoch': 3} {'type': 'loss', 'content': 0.032889243215322495, 'timestamp': '2025-09-30 22:36:29.839644', 'step': 19605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.870950', 'step': 19605, 'epoch': 3} {'type': 'loss', 'content': 0.06877095997333527, 'timestamp': '2025-09-30 22:36:29.873454', 'step': 19606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:29.904359', 'step': 19606, 'epoch': 3} {'type': 'loss', 'content': 0.05559014901518822, 'timestamp': '2025-09-30 22:36:29.910323', 'step': 19607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:29.943966', 'step': 19607, 'epoch': 3} {'type': 'loss', 'content': 0.02384539321064949, 'timestamp': '2025-09-30 22:36:29.969095', 'step': 19608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.000545', 'step': 19608, 'epoch': 3} {'type': 'loss', 'content': 0.038799531757831573, 'timestamp': '2025-09-30 22:36:30.007731', 'step': 19609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:30.040408', 'step': 19609, 'epoch': 3} {'type': 'loss', 'content': 0.029010022059082985, 'timestamp': '2025-09-30 22:36:30.047559', 'step': 19610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.079095', 'step': 19610, 'epoch': 3} {'type': 'loss', 'content': 0.08001578599214554, 'timestamp': '2025-09-30 22:36:30.082128', 'step': 19611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.126045', 'step': 19611, 'epoch': 3} {'type': 'loss', 'content': 0.02532881125807762, 'timestamp': '2025-09-30 22:36:30.150306', 'step': 19612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.182157', 'step': 19612, 'epoch': 3} {'type': 'loss', 'content': 0.036821428686380386, 'timestamp': '2025-09-30 22:36:30.190653', 'step': 19613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.225559', 'step': 19613, 'epoch': 3} {'type': 'loss', 'content': 0.12578044831752777, 'timestamp': '2025-09-30 22:36:30.236115', 'step': 19614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.267510', 'step': 19614, 'epoch': 3} {'type': 'loss', 'content': 0.037299372255802155, 'timestamp': '2025-09-30 22:36:30.270837', 'step': 19615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:30.305550', 'step': 19615, 'epoch': 3} {'type': 'loss', 'content': 0.047819241881370544, 'timestamp': '2025-09-30 22:36:30.329816', 'step': 19616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:30.360923', 'step': 19616, 'epoch': 3} {'type': 'loss', 'content': 0.03441084921360016, 'timestamp': '2025-09-30 22:36:30.364544', 'step': 19617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:30.395707', 'step': 19617, 'epoch': 3} {'type': 'loss', 'content': 0.0908677875995636, 'timestamp': '2025-09-30 22:36:30.398232', 'step': 19618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.429678', 'step': 19618, 'epoch': 3} {'type': 'loss', 'content': 0.07276575267314911, 'timestamp': '2025-09-30 22:36:30.432133', 'step': 19619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.463798', 'step': 19619, 'epoch': 3} {'type': 'loss', 'content': 0.15433070063591003, 'timestamp': '2025-09-30 22:36:30.488615', 'step': 19620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.520828', 'step': 19620, 'epoch': 3} {'type': 'loss', 'content': 0.06784813106060028, 'timestamp': '2025-09-30 22:36:30.523506', 'step': 19621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.555283', 'step': 19621, 'epoch': 3} {'type': 'loss', 'content': 0.05727091804146767, 'timestamp': '2025-09-30 22:36:30.557601', 'step': 19622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:30.588754', 'step': 19622, 'epoch': 3} {'type': 'loss', 'content': 0.035194557160139084, 'timestamp': '2025-09-30 22:36:30.593404', 'step': 19623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:30.624977', 'step': 19623, 'epoch': 3} {'type': 'loss', 'content': 0.06239253655076027, 'timestamp': '2025-09-30 22:36:30.650157', 'step': 19624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.682494', 'step': 19624, 'epoch': 3} {'type': 'loss', 'content': 0.062363363802433014, 'timestamp': '2025-09-30 22:36:30.685262', 'step': 19625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:30.717277', 'step': 19625, 'epoch': 3} {'type': 'loss', 'content': 0.05487649887800217, 'timestamp': '2025-09-30 22:36:30.722023', 'step': 19626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.753317', 'step': 19626, 'epoch': 3} {'type': 'loss', 'content': 0.022536596283316612, 'timestamp': '2025-09-30 22:36:30.763021', 'step': 19627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:30.805889', 'step': 19627, 'epoch': 3} {'type': 'loss', 'content': 0.029697192832827568, 'timestamp': '2025-09-30 22:36:30.836303', 'step': 19628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:30.868376', 'step': 19628, 'epoch': 3} {'type': 'loss', 'content': 0.017679091542959213, 'timestamp': '2025-09-30 22:36:30.877106', 'step': 19629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:30.920545', 'step': 19629, 'epoch': 3} {'type': 'loss', 'content': 0.05368352681398392, 'timestamp': '2025-09-30 22:36:30.937320', 'step': 19630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:30.991932', 'step': 19630, 'epoch': 3} {'type': 'loss', 'content': 0.09232836961746216, 'timestamp': '2025-09-30 22:36:30.996150', 'step': 19631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.038448', 'step': 19631, 'epoch': 3} {'type': 'loss', 'content': 0.059530504047870636, 'timestamp': '2025-09-30 22:36:31.062631', 'step': 19632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.110141', 'step': 19632, 'epoch': 3} {'type': 'loss', 'content': 0.09252244979143143, 'timestamp': '2025-09-30 22:36:31.120830', 'step': 19633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:31.152912', 'step': 19633, 'epoch': 3} {'type': 'loss', 'content': 0.23049964010715485, 'timestamp': '2025-09-30 22:36:31.159863', 'step': 19634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:31.197646', 'step': 19634, 'epoch': 3} {'type': 'loss', 'content': 0.15992088615894318, 'timestamp': '2025-09-30 22:36:31.203285', 'step': 19635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.236617', 'step': 19635, 'epoch': 3} {'type': 'loss', 'content': 0.11505736410617828, 'timestamp': '2025-09-30 22:36:31.262297', 'step': 19636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.308515', 'step': 19636, 'epoch': 3} {'type': 'loss', 'content': 0.06861349940299988, 'timestamp': '2025-09-30 22:36:31.312191', 'step': 19637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.352382', 'step': 19637, 'epoch': 3} {'type': 'loss', 'content': 0.03667910397052765, 'timestamp': '2025-09-30 22:36:31.359892', 'step': 19638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:31.403759', 'step': 19638, 'epoch': 3} {'type': 'loss', 'content': 0.06195599213242531, 'timestamp': '2025-09-30 22:36:31.408338', 'step': 19639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:31.451435', 'step': 19639, 'epoch': 3} {'type': 'loss', 'content': 0.021013934165239334, 'timestamp': '2025-09-30 22:36:31.481205', 'step': 19640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.517314', 'step': 19640, 'epoch': 3} {'type': 'loss', 'content': 0.0942961797118187, 'timestamp': '2025-09-30 22:36:31.525839', 'step': 19641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:31.557793', 'step': 19641, 'epoch': 3} {'type': 'loss', 'content': 0.05144825205206871, 'timestamp': '2025-09-30 22:36:31.570238', 'step': 19642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.615167', 'step': 19642, 'epoch': 3} {'type': 'loss', 'content': 0.02047649398446083, 'timestamp': '2025-09-30 22:36:31.618925', 'step': 19643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:31.651159', 'step': 19643, 'epoch': 3} {'type': 'loss', 'content': 0.011115632951259613, 'timestamp': '2025-09-30 22:36:31.688266', 'step': 19644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.722965', 'step': 19644, 'epoch': 3} {'type': 'loss', 'content': 0.03325853496789932, 'timestamp': '2025-09-30 22:36:31.737966', 'step': 19645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:31.770815', 'step': 19645, 'epoch': 3} {'type': 'loss', 'content': 0.1464071422815323, 'timestamp': '2025-09-30 22:36:31.791832', 'step': 19646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:31.830661', 'step': 19646, 'epoch': 3} {'type': 'loss', 'content': 0.09976102411746979, 'timestamp': '2025-09-30 22:36:31.856443', 'step': 19647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:31.899634', 'step': 19647, 'epoch': 3} {'type': 'loss', 'content': 0.1695624589920044, 'timestamp': '2025-09-30 22:36:31.926340', 'step': 19648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:31.956706', 'step': 19648, 'epoch': 3} {'type': 'loss', 'content': 0.022196808829903603, 'timestamp': '2025-09-30 22:36:31.961655', 'step': 19649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.012450', 'step': 19649, 'epoch': 3} {'type': 'loss', 'content': 0.0894501581788063, 'timestamp': '2025-09-30 22:36:32.017510', 'step': 19650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.052114', 'step': 19650, 'epoch': 3} {'type': 'loss', 'content': 0.09849841892719269, 'timestamp': '2025-09-30 22:36:32.058652', 'step': 19651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:32.102359', 'step': 19651, 'epoch': 3} {'type': 'loss', 'content': 0.03387334570288658, 'timestamp': '2025-09-30 22:36:32.131851', 'step': 19652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:32.166526', 'step': 19652, 'epoch': 3} {'type': 'loss', 'content': 0.1121894046664238, 'timestamp': '2025-09-30 22:36:32.174224', 'step': 19653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.214012', 'step': 19653, 'epoch': 3} {'type': 'loss', 'content': 0.03223753347992897, 'timestamp': '2025-09-30 22:36:32.219061', 'step': 19654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:32.257582', 'step': 19654, 'epoch': 3} {'type': 'loss', 'content': 0.12807714939117432, 'timestamp': '2025-09-30 22:36:32.264243', 'step': 19655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.298612', 'step': 19655, 'epoch': 3} {'type': 'loss', 'content': 0.08198433369398117, 'timestamp': '2025-09-30 22:36:32.326225', 'step': 19656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.360333', 'step': 19656, 'epoch': 3} {'type': 'loss', 'content': 0.03223983198404312, 'timestamp': '2025-09-30 22:36:32.363383', 'step': 19657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:32.394524', 'step': 19657, 'epoch': 3} {'type': 'loss', 'content': 0.01953021064400673, 'timestamp': '2025-09-30 22:36:32.397323', 'step': 19658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.428382', 'step': 19658, 'epoch': 3} {'type': 'loss', 'content': 0.026120861992239952, 'timestamp': '2025-09-30 22:36:32.432772', 'step': 19659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:32.482568', 'step': 19659, 'epoch': 3} {'type': 'loss', 'content': 0.05278497934341431, 'timestamp': '2025-09-30 22:36:32.508867', 'step': 19660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:32.539160', 'step': 19660, 'epoch': 3} {'type': 'loss', 'content': 0.0975259467959404, 'timestamp': '2025-09-30 22:36:32.545503', 'step': 19661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.578643', 'step': 19661, 'epoch': 3} {'type': 'loss', 'content': 0.028567438945174217, 'timestamp': '2025-09-30 22:36:32.581126', 'step': 19662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.611589', 'step': 19662, 'epoch': 3} {'type': 'loss', 'content': 0.09726709127426147, 'timestamp': '2025-09-30 22:36:32.614452', 'step': 19663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:32.654823', 'step': 19663, 'epoch': 3} {'type': 'loss', 'content': 0.00886089913547039, 'timestamp': '2025-09-30 22:36:32.681486', 'step': 19664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:32.714884', 'step': 19664, 'epoch': 3} {'type': 'loss', 'content': 0.035083696246147156, 'timestamp': '2025-09-30 22:36:32.719424', 'step': 19665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.750833', 'step': 19665, 'epoch': 3} {'type': 'loss', 'content': 0.2248893827199936, 'timestamp': '2025-09-30 22:36:32.756450', 'step': 19666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.786966', 'step': 19666, 'epoch': 3} {'type': 'loss', 'content': 0.08068613708019257, 'timestamp': '2025-09-30 22:36:32.793467', 'step': 19667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:32.825622', 'step': 19667, 'epoch': 3} {'type': 'loss', 'content': 0.03996948525309563, 'timestamp': '2025-09-30 22:36:32.851101', 'step': 19668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:32.884168', 'step': 19668, 'epoch': 3} {'type': 'loss', 'content': 0.05206446722149849, 'timestamp': '2025-09-30 22:36:32.894042', 'step': 19669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:32.928041', 'step': 19669, 'epoch': 3} {'type': 'loss', 'content': 0.009642589837312698, 'timestamp': '2025-09-30 22:36:32.934081', 'step': 19670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:32.971885', 'step': 19670, 'epoch': 3} {'type': 'loss', 'content': 0.12359901517629623, 'timestamp': '2025-09-30 22:36:32.976514', 'step': 19671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:33.007478', 'step': 19671, 'epoch': 3} {'type': 'loss', 'content': 0.12384787201881409, 'timestamp': '2025-09-30 22:36:33.039160', 'step': 19672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.075414', 'step': 19672, 'epoch': 3} {'type': 'loss', 'content': 0.12896619737148285, 'timestamp': '2025-09-30 22:36:33.079956', 'step': 19673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.114159', 'step': 19673, 'epoch': 3} {'type': 'loss', 'content': 0.11787465214729309, 'timestamp': '2025-09-30 22:36:33.117397', 'step': 19674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.148809', 'step': 19674, 'epoch': 3} {'type': 'loss', 'content': 0.10493332147598267, 'timestamp': '2025-09-30 22:36:33.153484', 'step': 19675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.184830', 'step': 19675, 'epoch': 3} {'type': 'loss', 'content': 0.06319740414619446, 'timestamp': '2025-09-30 22:36:33.210113', 'step': 19676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.242170', 'step': 19676, 'epoch': 3} {'type': 'loss', 'content': 0.03177434206008911, 'timestamp': '2025-09-30 22:36:33.249355', 'step': 19677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:33.282078', 'step': 19677, 'epoch': 3} {'type': 'loss', 'content': 0.07719644159078598, 'timestamp': '2025-09-30 22:36:33.284321', 'step': 19678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:33.317325', 'step': 19678, 'epoch': 3} {'type': 'loss', 'content': 0.050902485847473145, 'timestamp': '2025-09-30 22:36:33.322393', 'step': 19679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.353275', 'step': 19679, 'epoch': 3} {'type': 'loss', 'content': 0.08016424626111984, 'timestamp': '2025-09-30 22:36:33.380693', 'step': 19680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.417022', 'step': 19680, 'epoch': 3} {'type': 'loss', 'content': 0.11057566106319427, 'timestamp': '2025-09-30 22:36:33.420389', 'step': 19681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.452299', 'step': 19681, 'epoch': 3} {'type': 'loss', 'content': 0.01642512157559395, 'timestamp': '2025-09-30 22:36:33.457250', 'step': 19682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.489231', 'step': 19682, 'epoch': 3} {'type': 'loss', 'content': 0.013290771283209324, 'timestamp': '2025-09-30 22:36:33.493367', 'step': 19683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:33.529869', 'step': 19683, 'epoch': 3} {'type': 'loss', 'content': 0.08785023540258408, 'timestamp': '2025-09-30 22:36:33.554165', 'step': 19684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.585896', 'step': 19684, 'epoch': 3} {'type': 'loss', 'content': 0.13409268856048584, 'timestamp': '2025-09-30 22:36:33.591900', 'step': 19685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:33.625165', 'step': 19685, 'epoch': 3} {'type': 'loss', 'content': 0.11017318069934845, 'timestamp': '2025-09-30 22:36:33.629544', 'step': 19686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.662588', 'step': 19686, 'epoch': 3} {'type': 'loss', 'content': 0.018679209053516388, 'timestamp': '2025-09-30 22:36:33.665822', 'step': 19687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:33.699411', 'step': 19687, 'epoch': 3} {'type': 'loss', 'content': 0.0815819501876831, 'timestamp': '2025-09-30 22:36:33.725352', 'step': 19688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.756736', 'step': 19688, 'epoch': 3} {'type': 'loss', 'content': 0.05809246748685837, 'timestamp': '2025-09-30 22:36:33.761251', 'step': 19689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:33.794520', 'step': 19689, 'epoch': 3} {'type': 'loss', 'content': 0.08539845794439316, 'timestamp': '2025-09-30 22:36:33.798458', 'step': 19690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.830469', 'step': 19690, 'epoch': 3} {'type': 'loss', 'content': 0.030517293140292168, 'timestamp': '2025-09-30 22:36:33.835039', 'step': 19691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.869308', 'step': 19691, 'epoch': 3} {'type': 'loss', 'content': 0.03003840707242489, 'timestamp': '2025-09-30 22:36:33.896811', 'step': 19692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:33.928980', 'step': 19692, 'epoch': 3} {'type': 'loss', 'content': 0.11650675535202026, 'timestamp': '2025-09-30 22:36:33.933616', 'step': 19693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:33.976840', 'step': 19693, 'epoch': 3} {'type': 'loss', 'content': 0.09758136421442032, 'timestamp': '2025-09-30 22:36:33.980233', 'step': 19694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:34.021087', 'step': 19694, 'epoch': 3} {'type': 'loss', 'content': 0.10867354273796082, 'timestamp': '2025-09-30 22:36:34.023880', 'step': 19695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.054841', 'step': 19695, 'epoch': 3} {'type': 'loss', 'content': 0.02514408715069294, 'timestamp': '2025-09-30 22:36:34.079861', 'step': 19696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:34.112507', 'step': 19696, 'epoch': 3} {'type': 'loss', 'content': 0.10406539589166641, 'timestamp': '2025-09-30 22:36:34.116501', 'step': 19697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.147434', 'step': 19697, 'epoch': 3} {'type': 'loss', 'content': 0.059450551867485046, 'timestamp': '2025-09-30 22:36:34.150545', 'step': 19698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.187387', 'step': 19698, 'epoch': 3} {'type': 'loss', 'content': 0.0917113870382309, 'timestamp': '2025-09-30 22:36:34.200550', 'step': 19699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.234181', 'step': 19699, 'epoch': 3} {'type': 'loss', 'content': 0.020282359793782234, 'timestamp': '2025-09-30 22:36:34.268959', 'step': 19700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:34.299934', 'step': 19700, 'epoch': 3} {'type': 'loss', 'content': 0.10312886536121368, 'timestamp': '2025-09-30 22:36:34.303393', 'step': 19701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.335692', 'step': 19701, 'epoch': 3} {'type': 'loss', 'content': 0.11664451658725739, 'timestamp': '2025-09-30 22:36:34.339854', 'step': 19702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.371630', 'step': 19702, 'epoch': 3} {'type': 'loss', 'content': 0.02435184083878994, 'timestamp': '2025-09-30 22:36:34.375579', 'step': 19703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.405881', 'step': 19703, 'epoch': 3} {'type': 'loss', 'content': 0.16311706602573395, 'timestamp': '2025-09-30 22:36:34.430897', 'step': 19704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.462451', 'step': 19704, 'epoch': 3} {'type': 'loss', 'content': 0.06473804265260696, 'timestamp': '2025-09-30 22:36:34.469024', 'step': 19705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.502376', 'step': 19705, 'epoch': 3} {'type': 'loss', 'content': 0.02168755792081356, 'timestamp': '2025-09-30 22:36:34.508115', 'step': 19706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.541261', 'step': 19706, 'epoch': 3} {'type': 'loss', 'content': 0.016028570011258125, 'timestamp': '2025-09-30 22:36:34.544399', 'step': 19707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.575810', 'step': 19707, 'epoch': 3} {'type': 'loss', 'content': 0.028994766995310783, 'timestamp': '2025-09-30 22:36:34.600633', 'step': 19708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:34.644841', 'step': 19708, 'epoch': 3} {'type': 'loss', 'content': 0.037050772458314896, 'timestamp': '2025-09-30 22:36:34.652638', 'step': 19709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.691943', 'step': 19709, 'epoch': 3} {'type': 'loss', 'content': 0.08161389082670212, 'timestamp': '2025-09-30 22:36:34.716444', 'step': 19710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.769543', 'step': 19710, 'epoch': 3} {'type': 'loss', 'content': 0.047884438186883926, 'timestamp': '2025-09-30 22:36:34.790309', 'step': 19711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:34.833334', 'step': 19711, 'epoch': 3} {'type': 'loss', 'content': 0.14303846657276154, 'timestamp': '2025-09-30 22:36:34.880734', 'step': 19712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:34.923622', 'step': 19712, 'epoch': 3} {'type': 'loss', 'content': 0.12374170124530792, 'timestamp': '2025-09-30 22:36:34.933479', 'step': 19713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:34.974922', 'step': 19713, 'epoch': 3} {'type': 'loss', 'content': 0.06463885307312012, 'timestamp': '2025-09-30 22:36:34.979539', 'step': 19714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:35.019105', 'step': 19714, 'epoch': 3} {'type': 'loss', 'content': 0.057687025517225266, 'timestamp': '2025-09-30 22:36:35.026726', 'step': 19715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.066304', 'step': 19715, 'epoch': 3} {'type': 'loss', 'content': 0.060043156147003174, 'timestamp': '2025-09-30 22:36:35.099213', 'step': 19716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:35.134365', 'step': 19716, 'epoch': 3} {'type': 'loss', 'content': 0.09639374911785126, 'timestamp': '2025-09-30 22:36:35.138661', 'step': 19717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.169996', 'step': 19717, 'epoch': 3} {'type': 'loss', 'content': 0.1192980483174324, 'timestamp': '2025-09-30 22:36:35.173489', 'step': 19718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.229229', 'step': 19718, 'epoch': 3} {'type': 'loss', 'content': 0.058251455426216125, 'timestamp': '2025-09-30 22:36:35.233049', 'step': 19719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.265805', 'step': 19719, 'epoch': 3} {'type': 'loss', 'content': 0.08648666739463806, 'timestamp': '2025-09-30 22:36:35.291222', 'step': 19720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:35.324516', 'step': 19720, 'epoch': 3} {'type': 'loss', 'content': 0.01963556744158268, 'timestamp': '2025-09-30 22:36:35.327830', 'step': 19721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.365691', 'step': 19721, 'epoch': 3} {'type': 'loss', 'content': 0.08767548203468323, 'timestamp': '2025-09-30 22:36:35.369451', 'step': 19722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.409776', 'step': 19722, 'epoch': 3} {'type': 'loss', 'content': 0.10234055668115616, 'timestamp': '2025-09-30 22:36:35.420261', 'step': 19723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:35.452260', 'step': 19723, 'epoch': 3} {'type': 'loss', 'content': 0.08881103992462158, 'timestamp': '2025-09-30 22:36:35.477077', 'step': 19724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.508555', 'step': 19724, 'epoch': 3} {'type': 'loss', 'content': 0.13395510613918304, 'timestamp': '2025-09-30 22:36:35.512732', 'step': 19725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.549463', 'step': 19725, 'epoch': 3} {'type': 'loss', 'content': 0.06699363142251968, 'timestamp': '2025-09-30 22:36:35.553776', 'step': 19726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.591154', 'step': 19726, 'epoch': 3} {'type': 'loss', 'content': 0.050559140741825104, 'timestamp': '2025-09-30 22:36:35.595254', 'step': 19727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:35.627822', 'step': 19727, 'epoch': 3} {'type': 'loss', 'content': 0.0604793056845665, 'timestamp': '2025-09-30 22:36:35.653188', 'step': 19728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:35.684061', 'step': 19728, 'epoch': 3} {'type': 'loss', 'content': 0.04469306021928787, 'timestamp': '2025-09-30 22:36:35.690578', 'step': 19729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:35.723486', 'step': 19729, 'epoch': 3} {'type': 'loss', 'content': 0.1060812845826149, 'timestamp': '2025-09-30 22:36:35.729358', 'step': 19730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.760264', 'step': 19730, 'epoch': 3} {'type': 'loss', 'content': 0.02352852001786232, 'timestamp': '2025-09-30 22:36:35.766360', 'step': 19731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.797271', 'step': 19731, 'epoch': 3} {'type': 'loss', 'content': 0.0671156570315361, 'timestamp': '2025-09-30 22:36:35.829579', 'step': 19732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.867323', 'step': 19732, 'epoch': 3} {'type': 'loss', 'content': 0.12111101299524307, 'timestamp': '2025-09-30 22:36:35.874573', 'step': 19733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.909275', 'step': 19733, 'epoch': 3} {'type': 'loss', 'content': 0.048122700303792953, 'timestamp': '2025-09-30 22:36:35.912963', 'step': 19734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:35.945480', 'step': 19734, 'epoch': 3} {'type': 'loss', 'content': 0.05047006905078888, 'timestamp': '2025-09-30 22:36:35.949314', 'step': 19735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:35.980786', 'step': 19735, 'epoch': 3} {'type': 'loss', 'content': 0.04364296421408653, 'timestamp': '2025-09-30 22:36:36.007796', 'step': 19736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.039760', 'step': 19736, 'epoch': 3} {'type': 'loss', 'content': 0.0559418722987175, 'timestamp': '2025-09-30 22:36:36.042839', 'step': 19737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.073726', 'step': 19737, 'epoch': 3} {'type': 'loss', 'content': 0.03411797434091568, 'timestamp': '2025-09-30 22:36:36.088934', 'step': 19738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.121265', 'step': 19738, 'epoch': 3} {'type': 'loss', 'content': 0.04173967242240906, 'timestamp': '2025-09-30 22:36:36.123881', 'step': 19739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:36.156236', 'step': 19739, 'epoch': 3} {'type': 'loss', 'content': 0.02910509891808033, 'timestamp': '2025-09-30 22:36:36.180537', 'step': 19740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.210101', 'step': 19740, 'epoch': 3} {'type': 'loss', 'content': 0.05914893001317978, 'timestamp': '2025-09-30 22:36:36.222885', 'step': 19741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.255911', 'step': 19741, 'epoch': 3} {'type': 'loss', 'content': 0.1381194293498993, 'timestamp': '2025-09-30 22:36:36.264239', 'step': 19742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:36.300008', 'step': 19742, 'epoch': 3} {'type': 'loss', 'content': 0.0034198348876088858, 'timestamp': '2025-09-30 22:36:36.304149', 'step': 19743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:36.334332', 'step': 19743, 'epoch': 3} {'type': 'loss', 'content': 0.02790902554988861, 'timestamp': '2025-09-30 22:36:36.365563', 'step': 19744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.398606', 'step': 19744, 'epoch': 3} {'type': 'loss', 'content': 0.04297805577516556, 'timestamp': '2025-09-30 22:36:36.403872', 'step': 19745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.450629', 'step': 19745, 'epoch': 3} {'type': 'loss', 'content': 0.05463217943906784, 'timestamp': '2025-09-30 22:36:36.452897', 'step': 19746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.486650', 'step': 19746, 'epoch': 3} {'type': 'loss', 'content': 0.00854638870805502, 'timestamp': '2025-09-30 22:36:36.501953', 'step': 19747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.534977', 'step': 19747, 'epoch': 3} {'type': 'loss', 'content': 0.022458702325820923, 'timestamp': '2025-09-30 22:36:36.561526', 'step': 19748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:36.600796', 'step': 19748, 'epoch': 3} {'type': 'loss', 'content': 0.07804230600595474, 'timestamp': '2025-09-30 22:36:36.603782', 'step': 19749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.642047', 'step': 19749, 'epoch': 3} {'type': 'loss', 'content': 0.0534777007997036, 'timestamp': '2025-09-30 22:36:36.654850', 'step': 19750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:36.695475', 'step': 19750, 'epoch': 3} {'type': 'loss', 'content': 0.08566199243068695, 'timestamp': '2025-09-30 22:36:36.712547', 'step': 19751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:36.753445', 'step': 19751, 'epoch': 3} {'type': 'loss', 'content': 0.04565610736608505, 'timestamp': '2025-09-30 22:36:36.780380', 'step': 19752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:36.811752', 'step': 19752, 'epoch': 3} {'type': 'loss', 'content': 0.01890343613922596, 'timestamp': '2025-09-30 22:36:36.825003', 'step': 19753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:36.862677', 'step': 19753, 'epoch': 3} {'type': 'loss', 'content': 0.08810573071241379, 'timestamp': '2025-09-30 22:36:36.875617', 'step': 19754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:36.909793', 'step': 19754, 'epoch': 3} {'type': 'loss', 'content': 0.033473942428827286, 'timestamp': '2025-09-30 22:36:36.924945', 'step': 19755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:36.957396', 'step': 19755, 'epoch': 3} {'type': 'loss', 'content': 0.08843563497066498, 'timestamp': '2025-09-30 22:36:36.989474', 'step': 19756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.023102', 'step': 19756, 'epoch': 3} {'type': 'loss', 'content': 0.015138589777052402, 'timestamp': '2025-09-30 22:36:37.039535', 'step': 19757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:37.072402', 'step': 19757, 'epoch': 3} {'type': 'loss', 'content': 0.019902097061276436, 'timestamp': '2025-09-30 22:36:37.086609', 'step': 19758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.122020', 'step': 19758, 'epoch': 3} {'type': 'loss', 'content': 0.11083749681711197, 'timestamp': '2025-09-30 22:36:37.138990', 'step': 19759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.174777', 'step': 19759, 'epoch': 3} {'type': 'loss', 'content': 0.03882209584116936, 'timestamp': '2025-09-30 22:36:37.216100', 'step': 19760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:37.249243', 'step': 19760, 'epoch': 3} {'type': 'loss', 'content': 0.08037837594747543, 'timestamp': '2025-09-30 22:36:37.261154', 'step': 19761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:37.321769', 'step': 19761, 'epoch': 3} {'type': 'loss', 'content': 0.05971261486411095, 'timestamp': '2025-09-30 22:36:37.340331', 'step': 19762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:37.382985', 'step': 19762, 'epoch': 3} {'type': 'loss', 'content': 0.057329606264829636, 'timestamp': '2025-09-30 22:36:37.386054', 'step': 19763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:37.422315', 'step': 19763, 'epoch': 3} {'type': 'loss', 'content': 0.06139274314045906, 'timestamp': '2025-09-30 22:36:37.453974', 'step': 19764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:37.486473', 'step': 19764, 'epoch': 3} {'type': 'loss', 'content': 0.09702499955892563, 'timestamp': '2025-09-30 22:36:37.496738', 'step': 19765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:37.530268', 'step': 19765, 'epoch': 3} {'type': 'loss', 'content': 0.031654417514801025, 'timestamp': '2025-09-30 22:36:37.538004', 'step': 19766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.570946', 'step': 19766, 'epoch': 3} {'type': 'loss', 'content': 0.06053445488214493, 'timestamp': '2025-09-30 22:36:37.575714', 'step': 19767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:37.608783', 'step': 19767, 'epoch': 3} {'type': 'loss', 'content': 0.07078219950199127, 'timestamp': '2025-09-30 22:36:37.635185', 'step': 19768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.668463', 'step': 19768, 'epoch': 3} {'type': 'loss', 'content': 0.07855601608753204, 'timestamp': '2025-09-30 22:36:37.686040', 'step': 19769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:37.721944', 'step': 19769, 'epoch': 3} {'type': 'loss', 'content': 0.06151220202445984, 'timestamp': '2025-09-30 22:36:37.737025', 'step': 19770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:37.785160', 'step': 19770, 'epoch': 3} {'type': 'loss', 'content': 0.09169745445251465, 'timestamp': '2025-09-30 22:36:37.789596', 'step': 19771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:37.833552', 'step': 19771, 'epoch': 3} {'type': 'loss', 'content': 0.0827040821313858, 'timestamp': '2025-09-30 22:36:37.864807', 'step': 19772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:37.907006', 'step': 19772, 'epoch': 3} {'type': 'loss', 'content': 0.04839343577623367, 'timestamp': '2025-09-30 22:36:37.911286', 'step': 19773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:37.960639', 'step': 19773, 'epoch': 3} {'type': 'loss', 'content': 0.04785335808992386, 'timestamp': '2025-09-30 22:36:37.964208', 'step': 19774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.004154', 'step': 19774, 'epoch': 3} {'type': 'loss', 'content': 0.06736566126346588, 'timestamp': '2025-09-30 22:36:38.016966', 'step': 19775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:38.054099', 'step': 19775, 'epoch': 3} {'type': 'loss', 'content': 0.09791948646306992, 'timestamp': '2025-09-30 22:36:38.090377', 'step': 19776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:38.122376', 'step': 19776, 'epoch': 3} {'type': 'loss', 'content': 0.022430960088968277, 'timestamp': '2025-09-30 22:36:38.139248', 'step': 19777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:38.171116', 'step': 19777, 'epoch': 3} {'type': 'loss', 'content': 0.06947096437215805, 'timestamp': '2025-09-30 22:36:38.195964', 'step': 19778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.232787', 'step': 19778, 'epoch': 3} {'type': 'loss', 'content': 0.07933132350444794, 'timestamp': '2025-09-30 22:36:38.237774', 'step': 19779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:38.285302', 'step': 19779, 'epoch': 3} {'type': 'loss', 'content': 0.05823315307497978, 'timestamp': '2025-09-30 22:36:38.310378', 'step': 19780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.354221', 'step': 19780, 'epoch': 3} {'type': 'loss', 'content': 0.04473840817809105, 'timestamp': '2025-09-30 22:36:38.365455', 'step': 19781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:38.408605', 'step': 19781, 'epoch': 3} {'type': 'loss', 'content': 0.03699138015508652, 'timestamp': '2025-09-30 22:36:38.416124', 'step': 19782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:38.450315', 'step': 19782, 'epoch': 3} {'type': 'loss', 'content': 0.1020742654800415, 'timestamp': '2025-09-30 22:36:38.465830', 'step': 19783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:38.529549', 'step': 19783, 'epoch': 3} {'type': 'loss', 'content': 0.021555034443736076, 'timestamp': '2025-09-30 22:36:38.561778', 'step': 19784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:38.595466', 'step': 19784, 'epoch': 3} {'type': 'loss', 'content': 0.03446215018630028, 'timestamp': '2025-09-30 22:36:38.602259', 'step': 19785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:36:38.642159', 'step': 19785, 'epoch': 3} {'type': 'loss', 'content': 0.12059880793094635, 'timestamp': '2025-09-30 22:36:38.651941', 'step': 19786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.692923', 'step': 19786, 'epoch': 3} {'type': 'loss', 'content': 0.048714205622673035, 'timestamp': '2025-09-30 22:36:38.698161', 'step': 19787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:38.731944', 'step': 19787, 'epoch': 3} {'type': 'loss', 'content': 0.08059179037809372, 'timestamp': '2025-09-30 22:36:38.756768', 'step': 19788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:38.789103', 'step': 19788, 'epoch': 3} {'type': 'loss', 'content': 0.11449892818927765, 'timestamp': '2025-09-30 22:36:38.792559', 'step': 19789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:38.824854', 'step': 19789, 'epoch': 3} {'type': 'loss', 'content': 0.08474648743867874, 'timestamp': '2025-09-30 22:36:38.828114', 'step': 19790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:38.859872', 'step': 19790, 'epoch': 3} {'type': 'loss', 'content': 0.04441800341010094, 'timestamp': '2025-09-30 22:36:38.865886', 'step': 19791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.908824', 'step': 19791, 'epoch': 3} {'type': 'loss', 'content': 0.038160454481840134, 'timestamp': '2025-09-30 22:36:38.933688', 'step': 19792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:38.969287', 'step': 19792, 'epoch': 3} {'type': 'loss', 'content': 0.05481985583901405, 'timestamp': '2025-09-30 22:36:38.981021', 'step': 19793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.012103', 'step': 19793, 'epoch': 3} {'type': 'loss', 'content': 0.10004713386297226, 'timestamp': '2025-09-30 22:36:39.025633', 'step': 19794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.074290', 'step': 19794, 'epoch': 3} {'type': 'loss', 'content': 0.05427546054124832, 'timestamp': '2025-09-30 22:36:39.078362', 'step': 19795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.111714', 'step': 19795, 'epoch': 3} {'type': 'loss', 'content': 0.06031661108136177, 'timestamp': '2025-09-30 22:36:39.136099', 'step': 19796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:39.169166', 'step': 19796, 'epoch': 3} {'type': 'loss', 'content': 0.0829848200082779, 'timestamp': '2025-09-30 22:36:39.179516', 'step': 19797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:39.218592', 'step': 19797, 'epoch': 3} {'type': 'loss', 'content': 0.04663672298192978, 'timestamp': '2025-09-30 22:36:39.222489', 'step': 19798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.258527', 'step': 19798, 'epoch': 3} {'type': 'loss', 'content': 0.08040480315685272, 'timestamp': '2025-09-30 22:36:39.263372', 'step': 19799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.306719', 'step': 19799, 'epoch': 3} {'type': 'loss', 'content': 0.04949228838086128, 'timestamp': '2025-09-30 22:36:39.333140', 'step': 19800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.370448', 'step': 19800, 'epoch': 3} {'type': 'loss', 'content': 0.13221389055252075, 'timestamp': '2025-09-30 22:36:39.375084', 'step': 19801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.412551', 'step': 19801, 'epoch': 3} {'type': 'loss', 'content': 0.052803169935941696, 'timestamp': '2025-09-30 22:36:39.426131', 'step': 19802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:39.458769', 'step': 19802, 'epoch': 3} {'type': 'loss', 'content': 0.023034492507576942, 'timestamp': '2025-09-30 22:36:39.463904', 'step': 19803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:39.503117', 'step': 19803, 'epoch': 3} {'type': 'loss', 'content': 0.08173145353794098, 'timestamp': '2025-09-30 22:36:39.533684', 'step': 19804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:39.565155', 'step': 19804, 'epoch': 3} {'type': 'loss', 'content': 0.0656774565577507, 'timestamp': '2025-09-30 22:36:39.568898', 'step': 19805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.613703', 'step': 19805, 'epoch': 3} {'type': 'loss', 'content': 0.06776070594787598, 'timestamp': '2025-09-30 22:36:39.616816', 'step': 19806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.660696', 'step': 19806, 'epoch': 3} {'type': 'loss', 'content': 0.05715206265449524, 'timestamp': '2025-09-30 22:36:39.665258', 'step': 19807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.697160', 'step': 19807, 'epoch': 3} {'type': 'loss', 'content': 0.061865657567977905, 'timestamp': '2025-09-30 22:36:39.730301', 'step': 19808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.763733', 'step': 19808, 'epoch': 3} {'type': 'loss', 'content': 0.1275770217180252, 'timestamp': '2025-09-30 22:36:39.776875', 'step': 19809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.824730', 'step': 19809, 'epoch': 3} {'type': 'loss', 'content': 0.058958690613508224, 'timestamp': '2025-09-30 22:36:39.833115', 'step': 19810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:39.877779', 'step': 19810, 'epoch': 3} {'type': 'loss', 'content': 0.08428625762462616, 'timestamp': '2025-09-30 22:36:39.887226', 'step': 19811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:39.920986', 'step': 19811, 'epoch': 3} {'type': 'loss', 'content': 0.032431650906801224, 'timestamp': '2025-09-30 22:36:39.946346', 'step': 19812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:39.988817', 'step': 19812, 'epoch': 3} {'type': 'loss', 'content': 0.16197985410690308, 'timestamp': '2025-09-30 22:36:39.993002', 'step': 19813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.023964', 'step': 19813, 'epoch': 3} {'type': 'loss', 'content': 0.10136480629444122, 'timestamp': '2025-09-30 22:36:40.028618', 'step': 19814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:40.060209', 'step': 19814, 'epoch': 3} {'type': 'loss', 'content': 0.09544743597507477, 'timestamp': '2025-09-30 22:36:40.070610', 'step': 19815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.102608', 'step': 19815, 'epoch': 3} {'type': 'loss', 'content': 0.0246301107108593, 'timestamp': '2025-09-30 22:36:40.127747', 'step': 19816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:40.160961', 'step': 19816, 'epoch': 3} {'type': 'loss', 'content': 0.05490630120038986, 'timestamp': '2025-09-30 22:36:40.164240', 'step': 19817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.196757', 'step': 19817, 'epoch': 3} {'type': 'loss', 'content': 0.05946812033653259, 'timestamp': '2025-09-30 22:36:40.204849', 'step': 19818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.242061', 'step': 19818, 'epoch': 3} {'type': 'loss', 'content': 0.13166046142578125, 'timestamp': '2025-09-30 22:36:40.247945', 'step': 19819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:40.282889', 'step': 19819, 'epoch': 3} {'type': 'loss', 'content': 0.10679508000612259, 'timestamp': '2025-09-30 22:36:40.307854', 'step': 19820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.361308', 'step': 19820, 'epoch': 3} {'type': 'loss', 'content': 0.04283767193555832, 'timestamp': '2025-09-30 22:36:40.364225', 'step': 19821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:40.394773', 'step': 19821, 'epoch': 3} {'type': 'loss', 'content': 0.09118068218231201, 'timestamp': '2025-09-30 22:36:40.400835', 'step': 19822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.435523', 'step': 19822, 'epoch': 3} {'type': 'loss', 'content': 0.024329539388418198, 'timestamp': '2025-09-30 22:36:40.439273', 'step': 19823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.472159', 'step': 19823, 'epoch': 3} {'type': 'loss', 'content': 0.03028447926044464, 'timestamp': '2025-09-30 22:36:40.496770', 'step': 19824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.528157', 'step': 19824, 'epoch': 3} {'type': 'loss', 'content': 0.033379729837179184, 'timestamp': '2025-09-30 22:36:40.530524', 'step': 19825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:40.560453', 'step': 19825, 'epoch': 3} {'type': 'loss', 'content': 0.051137518137693405, 'timestamp': '2025-09-30 22:36:40.562735', 'step': 19826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.596626', 'step': 19826, 'epoch': 3} {'type': 'loss', 'content': 0.063973069190979, 'timestamp': '2025-09-30 22:36:40.600642', 'step': 19827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.636192', 'step': 19827, 'epoch': 3} {'type': 'loss', 'content': 0.06854677945375443, 'timestamp': '2025-09-30 22:36:40.660880', 'step': 19828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:40.692556', 'step': 19828, 'epoch': 3} {'type': 'loss', 'content': 0.03152573108673096, 'timestamp': '2025-09-30 22:36:40.697423', 'step': 19829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:40.729219', 'step': 19829, 'epoch': 3} {'type': 'loss', 'content': 0.06261978298425674, 'timestamp': '2025-09-30 22:36:40.732975', 'step': 19830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.764387', 'step': 19830, 'epoch': 3} {'type': 'loss', 'content': 0.07284463196992874, 'timestamp': '2025-09-30 22:36:40.767057', 'step': 19831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.800141', 'step': 19831, 'epoch': 3} {'type': 'loss', 'content': 0.09332960844039917, 'timestamp': '2025-09-30 22:36:40.825118', 'step': 19832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:40.862497', 'step': 19832, 'epoch': 3} {'type': 'loss', 'content': 0.03666510060429573, 'timestamp': '2025-09-30 22:36:40.872019', 'step': 19833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.908552', 'step': 19833, 'epoch': 3} {'type': 'loss', 'content': 0.21581588685512543, 'timestamp': '2025-09-30 22:36:40.917715', 'step': 19834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:40.956646', 'step': 19834, 'epoch': 3} {'type': 'loss', 'content': 0.04823792725801468, 'timestamp': '2025-09-30 22:36:40.961040', 'step': 19835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:40.992718', 'step': 19835, 'epoch': 3} {'type': 'loss', 'content': 0.03952515497803688, 'timestamp': '2025-09-30 22:36:41.016873', 'step': 19836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.046039', 'step': 19836, 'epoch': 3} {'type': 'loss', 'content': 0.1071028858423233, 'timestamp': '2025-09-30 22:36:41.048882', 'step': 19837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.091156', 'step': 19837, 'epoch': 3} {'type': 'loss', 'content': 0.050360433757305145, 'timestamp': '2025-09-30 22:36:41.094583', 'step': 19838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.125285', 'step': 19838, 'epoch': 3} {'type': 'loss', 'content': 0.08130557835102081, 'timestamp': '2025-09-30 22:36:41.128529', 'step': 19839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.158632', 'step': 19839, 'epoch': 3} {'type': 'loss', 'content': 0.09759521484375, 'timestamp': '2025-09-30 22:36:41.183602', 'step': 19840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.215102', 'step': 19840, 'epoch': 3} {'type': 'loss', 'content': 0.10488396883010864, 'timestamp': '2025-09-30 22:36:41.222342', 'step': 19841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.255030', 'step': 19841, 'epoch': 3} {'type': 'loss', 'content': 0.043033894151449203, 'timestamp': '2025-09-30 22:36:41.261269', 'step': 19842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.299783', 'step': 19842, 'epoch': 3} {'type': 'loss', 'content': 0.09096673130989075, 'timestamp': '2025-09-30 22:36:41.304244', 'step': 19843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.335690', 'step': 19843, 'epoch': 3} {'type': 'loss', 'content': 0.033173758536577225, 'timestamp': '2025-09-30 22:36:41.363295', 'step': 19844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.396120', 'step': 19844, 'epoch': 3} {'type': 'loss', 'content': 0.0990884006023407, 'timestamp': '2025-09-30 22:36:41.398767', 'step': 19845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.432102', 'step': 19845, 'epoch': 3} {'type': 'loss', 'content': 0.09966680407524109, 'timestamp': '2025-09-30 22:36:41.435647', 'step': 19846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:41.468612', 'step': 19846, 'epoch': 3} {'type': 'loss', 'content': 0.1427885890007019, 'timestamp': '2025-09-30 22:36:41.472003', 'step': 19847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.506947', 'step': 19847, 'epoch': 3} {'type': 'loss', 'content': 0.1365176886320114, 'timestamp': '2025-09-30 22:36:41.533560', 'step': 19848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.569937', 'step': 19848, 'epoch': 3} {'type': 'loss', 'content': 0.050092410296201706, 'timestamp': '2025-09-30 22:36:41.581694', 'step': 19849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:41.623931', 'step': 19849, 'epoch': 3} {'type': 'loss', 'content': 0.12533307075500488, 'timestamp': '2025-09-30 22:36:41.636250', 'step': 19850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.672454', 'step': 19850, 'epoch': 3} {'type': 'loss', 'content': 0.07205096632242203, 'timestamp': '2025-09-30 22:36:41.677494', 'step': 19851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:41.711271', 'step': 19851, 'epoch': 3} {'type': 'loss', 'content': 0.08536330610513687, 'timestamp': '2025-09-30 22:36:41.737175', 'step': 19852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.775390', 'step': 19852, 'epoch': 3} {'type': 'loss', 'content': 0.024199267849326134, 'timestamp': '2025-09-30 22:36:41.778947', 'step': 19853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.811943', 'step': 19853, 'epoch': 3} {'type': 'loss', 'content': 0.11889529973268509, 'timestamp': '2025-09-30 22:36:41.815954', 'step': 19854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.852479', 'step': 19854, 'epoch': 3} {'type': 'loss', 'content': 0.07942494004964828, 'timestamp': '2025-09-30 22:36:41.855309', 'step': 19855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.887195', 'step': 19855, 'epoch': 3} {'type': 'loss', 'content': 0.04457804560661316, 'timestamp': '2025-09-30 22:36:41.911883', 'step': 19856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:41.951647', 'step': 19856, 'epoch': 3} {'type': 'loss', 'content': 0.12564857304096222, 'timestamp': '2025-09-30 22:36:41.956364', 'step': 19857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:41.996773', 'step': 19857, 'epoch': 3} {'type': 'loss', 'content': 0.07482385635375977, 'timestamp': '2025-09-30 22:36:41.999854', 'step': 19858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.034001', 'step': 19858, 'epoch': 3} {'type': 'loss', 'content': 0.08904530853033066, 'timestamp': '2025-09-30 22:36:42.039573', 'step': 19859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.070555', 'step': 19859, 'epoch': 3} {'type': 'loss', 'content': 0.06059059128165245, 'timestamp': '2025-09-30 22:36:42.095262', 'step': 19860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:42.128348', 'step': 19860, 'epoch': 3} {'type': 'loss', 'content': 0.06254466623067856, 'timestamp': '2025-09-30 22:36:42.142184', 'step': 19861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:42.174728', 'step': 19861, 'epoch': 3} {'type': 'loss', 'content': 0.08939210325479507, 'timestamp': '2025-09-30 22:36:42.177318', 'step': 19862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:42.220093', 'step': 19862, 'epoch': 3} {'type': 'loss', 'content': 0.05640532821416855, 'timestamp': '2025-09-30 22:36:42.225519', 'step': 19863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.259580', 'step': 19863, 'epoch': 3} {'type': 'loss', 'content': 0.18262173235416412, 'timestamp': '2025-09-30 22:36:42.291816', 'step': 19864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:42.322701', 'step': 19864, 'epoch': 3} {'type': 'loss', 'content': 0.10656102001667023, 'timestamp': '2025-09-30 22:36:42.327148', 'step': 19865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:42.361418', 'step': 19865, 'epoch': 3} {'type': 'loss', 'content': 0.10195081681013107, 'timestamp': '2025-09-30 22:36:42.366394', 'step': 19866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:42.399384', 'step': 19866, 'epoch': 3} {'type': 'loss', 'content': 0.16324660181999207, 'timestamp': '2025-09-30 22:36:42.403905', 'step': 19867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.436509', 'step': 19867, 'epoch': 3} {'type': 'loss', 'content': 0.04439375549554825, 'timestamp': '2025-09-30 22:36:42.465324', 'step': 19868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:42.499670', 'step': 19868, 'epoch': 3} {'type': 'loss', 'content': 0.08913525193929672, 'timestamp': '2025-09-30 22:36:42.504517', 'step': 19869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:42.537820', 'step': 19869, 'epoch': 3} {'type': 'loss', 'content': 0.06396110355854034, 'timestamp': '2025-09-30 22:36:42.543833', 'step': 19870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:42.580332', 'step': 19870, 'epoch': 3} {'type': 'loss', 'content': 0.050196968019008636, 'timestamp': '2025-09-30 22:36:42.582850', 'step': 19871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:42.615708', 'step': 19871, 'epoch': 3} {'type': 'loss', 'content': 0.08719668537378311, 'timestamp': '2025-09-30 22:36:42.642505', 'step': 19872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.692756', 'step': 19872, 'epoch': 3} {'type': 'loss', 'content': 0.08847823739051819, 'timestamp': '2025-09-30 22:36:42.696257', 'step': 19873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:42.729679', 'step': 19873, 'epoch': 3} {'type': 'loss', 'content': 0.06331309676170349, 'timestamp': '2025-09-30 22:36:42.734712', 'step': 19874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:42.769438', 'step': 19874, 'epoch': 3} {'type': 'loss', 'content': 0.08514174818992615, 'timestamp': '2025-09-30 22:36:42.774725', 'step': 19875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:42.815176', 'step': 19875, 'epoch': 3} {'type': 'loss', 'content': 0.06471629440784454, 'timestamp': '2025-09-30 22:36:42.843255', 'step': 19876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:42.883981', 'step': 19876, 'epoch': 3} {'type': 'loss', 'content': 0.07633267343044281, 'timestamp': '2025-09-30 22:36:42.888111', 'step': 19877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:42.918855', 'step': 19877, 'epoch': 3} {'type': 'loss', 'content': 0.06248503178358078, 'timestamp': '2025-09-30 22:36:42.932590', 'step': 19878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:42.967067', 'step': 19878, 'epoch': 3} {'type': 'loss', 'content': 0.06730564683675766, 'timestamp': '2025-09-30 22:36:42.971129', 'step': 19879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:43.006608', 'step': 19879, 'epoch': 3} {'type': 'loss', 'content': 0.06940905004739761, 'timestamp': '2025-09-30 22:36:43.031735', 'step': 19880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:43.065035', 'step': 19880, 'epoch': 3} {'type': 'loss', 'content': 0.061027318239212036, 'timestamp': '2025-09-30 22:36:43.070653', 'step': 19881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.105556', 'step': 19881, 'epoch': 3} {'type': 'loss', 'content': 0.14140577614307404, 'timestamp': '2025-09-30 22:36:43.115024', 'step': 19882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.154389', 'step': 19882, 'epoch': 3} {'type': 'loss', 'content': 0.017989037558436394, 'timestamp': '2025-09-30 22:36:43.159839', 'step': 19883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.199133', 'step': 19883, 'epoch': 3} {'type': 'loss', 'content': 0.04187959432601929, 'timestamp': '2025-09-30 22:36:43.223605', 'step': 19884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.265532', 'step': 19884, 'epoch': 3} {'type': 'loss', 'content': 0.09302361309528351, 'timestamp': '2025-09-30 22:36:43.269118', 'step': 19885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.298841', 'step': 19885, 'epoch': 3} {'type': 'loss', 'content': 0.1493459939956665, 'timestamp': '2025-09-30 22:36:43.305469', 'step': 19886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.336674', 'step': 19886, 'epoch': 3} {'type': 'loss', 'content': 0.06908523291349411, 'timestamp': '2025-09-30 22:36:43.340018', 'step': 19887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.372060', 'step': 19887, 'epoch': 3} {'type': 'loss', 'content': 0.028430109843611717, 'timestamp': '2025-09-30 22:36:43.398140', 'step': 19888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:43.431382', 'step': 19888, 'epoch': 3} {'type': 'loss', 'content': 0.17279265820980072, 'timestamp': '2025-09-30 22:36:43.434400', 'step': 19889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.466509', 'step': 19889, 'epoch': 3} {'type': 'loss', 'content': 0.0855102613568306, 'timestamp': '2025-09-30 22:36:43.481246', 'step': 19890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.523867', 'step': 19890, 'epoch': 3} {'type': 'loss', 'content': 0.04201338067650795, 'timestamp': '2025-09-30 22:36:43.526706', 'step': 19891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:43.557319', 'step': 19891, 'epoch': 3} {'type': 'loss', 'content': 0.010049328207969666, 'timestamp': '2025-09-30 22:36:43.580952', 'step': 19892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:43.612170', 'step': 19892, 'epoch': 3} {'type': 'loss', 'content': 0.10393376648426056, 'timestamp': '2025-09-30 22:36:43.615169', 'step': 19893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:43.645959', 'step': 19893, 'epoch': 3} {'type': 'loss', 'content': 0.09981109201908112, 'timestamp': '2025-09-30 22:36:43.661703', 'step': 19894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:43.693077', 'step': 19894, 'epoch': 3} {'type': 'loss', 'content': 0.04830582067370415, 'timestamp': '2025-09-30 22:36:43.695705', 'step': 19895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:43.727012', 'step': 19895, 'epoch': 3} {'type': 'loss', 'content': 0.08291538059711456, 'timestamp': '2025-09-30 22:36:43.753095', 'step': 19896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:43.790506', 'step': 19896, 'epoch': 3} {'type': 'loss', 'content': 0.07151191681623459, 'timestamp': '2025-09-30 22:36:43.805289', 'step': 19897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:43.837708', 'step': 19897, 'epoch': 3} {'type': 'loss', 'content': 0.07702784985303879, 'timestamp': '2025-09-30 22:36:43.851806', 'step': 19898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:43.884360', 'step': 19898, 'epoch': 3} {'type': 'loss', 'content': 0.19578590989112854, 'timestamp': '2025-09-30 22:36:43.889531', 'step': 19899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.939775', 'step': 19899, 'epoch': 3} {'type': 'loss', 'content': 0.08495375514030457, 'timestamp': '2025-09-30 22:36:43.965092', 'step': 19900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:43.998782', 'step': 19900, 'epoch': 3} {'type': 'loss', 'content': 0.09451484680175781, 'timestamp': '2025-09-30 22:36:44.003692', 'step': 19901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:44.036733', 'step': 19901, 'epoch': 3} {'type': 'loss', 'content': 0.06547536700963974, 'timestamp': '2025-09-30 22:36:44.048822', 'step': 19902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:44.082996', 'step': 19902, 'epoch': 3} {'type': 'loss', 'content': 0.05324641615152359, 'timestamp': '2025-09-30 22:36:44.088285', 'step': 19903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:44.119184', 'step': 19903, 'epoch': 3} {'type': 'loss', 'content': 0.10326436161994934, 'timestamp': '2025-09-30 22:36:44.149502', 'step': 19904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:44.182173', 'step': 19904, 'epoch': 3} {'type': 'loss', 'content': 0.06557609885931015, 'timestamp': '2025-09-30 22:36:44.185403', 'step': 19905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:44.218617', 'step': 19905, 'epoch': 3} {'type': 'loss', 'content': 0.08541379868984222, 'timestamp': '2025-09-30 22:36:44.223985', 'step': 19906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:44.257829', 'step': 19906, 'epoch': 3} {'type': 'loss', 'content': 0.15219449996948242, 'timestamp': '2025-09-30 22:36:44.261243', 'step': 19907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:44.292881', 'step': 19907, 'epoch': 3} {'type': 'loss', 'content': 0.08963032066822052, 'timestamp': '2025-09-30 22:36:44.317251', 'step': 19908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.352168', 'step': 19908, 'epoch': 3} {'type': 'loss', 'content': 0.047861892729997635, 'timestamp': '2025-09-30 22:36:44.364219', 'step': 19909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.397318', 'step': 19909, 'epoch': 3} {'type': 'loss', 'content': 0.058195386081933975, 'timestamp': '2025-09-30 22:36:44.401298', 'step': 19910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.433351', 'step': 19910, 'epoch': 3} {'type': 'loss', 'content': 0.13620229065418243, 'timestamp': '2025-09-30 22:36:44.436413', 'step': 19911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:44.467439', 'step': 19911, 'epoch': 3} {'type': 'loss', 'content': 0.09095554053783417, 'timestamp': '2025-09-30 22:36:44.493920', 'step': 19912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.526671', 'step': 19912, 'epoch': 3} {'type': 'loss', 'content': 0.06612128019332886, 'timestamp': '2025-09-30 22:36:44.529051', 'step': 19913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:44.563077', 'step': 19913, 'epoch': 3} {'type': 'loss', 'content': 0.14443710446357727, 'timestamp': '2025-09-30 22:36:44.569846', 'step': 19914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:44.601755', 'step': 19914, 'epoch': 3} {'type': 'loss', 'content': 0.09474994987249374, 'timestamp': '2025-09-30 22:36:44.605895', 'step': 19915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:44.638902', 'step': 19915, 'epoch': 3} {'type': 'loss', 'content': 0.049337297677993774, 'timestamp': '2025-09-30 22:36:44.664353', 'step': 19916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.699322', 'step': 19916, 'epoch': 3} {'type': 'loss', 'content': 0.06799580901861191, 'timestamp': '2025-09-30 22:36:44.702550', 'step': 19917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:44.735655', 'step': 19917, 'epoch': 3} {'type': 'loss', 'content': 0.05846783146262169, 'timestamp': '2025-09-30 22:36:44.741549', 'step': 19918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.775317', 'step': 19918, 'epoch': 3} {'type': 'loss', 'content': 0.1146320104598999, 'timestamp': '2025-09-30 22:36:44.781333', 'step': 19919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:36:44.813624', 'step': 19919, 'epoch': 3} {'type': 'loss', 'content': 0.04943520575761795, 'timestamp': '2025-09-30 22:36:44.839098', 'step': 19920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:44.871321', 'step': 19920, 'epoch': 3} {'type': 'loss', 'content': 0.06710657477378845, 'timestamp': '2025-09-30 22:36:44.873293', 'step': 19921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:44.904269', 'step': 19921, 'epoch': 3} {'type': 'loss', 'content': 0.052469853311777115, 'timestamp': '2025-09-30 22:36:44.909366', 'step': 19922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:44.943911', 'step': 19922, 'epoch': 3} {'type': 'loss', 'content': 0.13123354315757751, 'timestamp': '2025-09-30 22:36:44.948662', 'step': 19923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:44.981177', 'step': 19923, 'epoch': 3} {'type': 'loss', 'content': 0.06459555774927139, 'timestamp': '2025-09-30 22:36:45.007141', 'step': 19924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.039108', 'step': 19924, 'epoch': 3} {'type': 'loss', 'content': 0.027193190529942513, 'timestamp': '2025-09-30 22:36:45.042688', 'step': 19925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.076655', 'step': 19925, 'epoch': 3} {'type': 'loss', 'content': 0.04968425631523132, 'timestamp': '2025-09-30 22:36:45.079050', 'step': 19926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.109747', 'step': 19926, 'epoch': 3} {'type': 'loss', 'content': 0.07977403700351715, 'timestamp': '2025-09-30 22:36:45.123106', 'step': 19927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.165582', 'step': 19927, 'epoch': 3} {'type': 'loss', 'content': 0.08109044283628464, 'timestamp': '2025-09-30 22:36:45.192251', 'step': 19928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:45.231942', 'step': 19928, 'epoch': 3} {'type': 'loss', 'content': 0.0506691038608551, 'timestamp': '2025-09-30 22:36:45.234957', 'step': 19929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.269163', 'step': 19929, 'epoch': 3} {'type': 'loss', 'content': 0.05727620795369148, 'timestamp': '2025-09-30 22:36:45.273274', 'step': 19930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.317787', 'step': 19930, 'epoch': 3} {'type': 'loss', 'content': 0.09974969923496246, 'timestamp': '2025-09-30 22:36:45.325797', 'step': 19931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.358362', 'step': 19931, 'epoch': 3} {'type': 'loss', 'content': 0.06327766180038452, 'timestamp': '2025-09-30 22:36:45.384080', 'step': 19932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.415734', 'step': 19932, 'epoch': 3} {'type': 'loss', 'content': 0.10018189996480942, 'timestamp': '2025-09-30 22:36:45.422946', 'step': 19933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:45.459275', 'step': 19933, 'epoch': 3} {'type': 'loss', 'content': 0.09006267786026001, 'timestamp': '2025-09-30 22:36:45.462891', 'step': 19934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.497158', 'step': 19934, 'epoch': 3} {'type': 'loss', 'content': 0.038585785776376724, 'timestamp': '2025-09-30 22:36:45.510517', 'step': 19935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:45.543839', 'step': 19935, 'epoch': 3} {'type': 'loss', 'content': 0.05905965715646744, 'timestamp': '2025-09-30 22:36:45.569265', 'step': 19936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:45.601596', 'step': 19936, 'epoch': 3} {'type': 'loss', 'content': 0.08182597905397415, 'timestamp': '2025-09-30 22:36:45.606286', 'step': 19937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:45.637414', 'step': 19937, 'epoch': 3} {'type': 'loss', 'content': 0.1287064552307129, 'timestamp': '2025-09-30 22:36:45.639923', 'step': 19938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.671279', 'step': 19938, 'epoch': 3} {'type': 'loss', 'content': 0.08750062435865402, 'timestamp': '2025-09-30 22:36:45.674361', 'step': 19939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.707857', 'step': 19939, 'epoch': 3} {'type': 'loss', 'content': 0.06402919441461563, 'timestamp': '2025-09-30 22:36:45.732882', 'step': 19940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.767325', 'step': 19940, 'epoch': 3} {'type': 'loss', 'content': 0.022877831012010574, 'timestamp': '2025-09-30 22:36:45.770492', 'step': 19941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:45.808548', 'step': 19941, 'epoch': 3} {'type': 'loss', 'content': 0.06988251209259033, 'timestamp': '2025-09-30 22:36:45.812319', 'step': 19942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:45.844322', 'step': 19942, 'epoch': 3} {'type': 'loss', 'content': 0.07849022001028061, 'timestamp': '2025-09-30 22:36:45.846880', 'step': 19943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:45.877621', 'step': 19943, 'epoch': 3} {'type': 'loss', 'content': 0.07331608235836029, 'timestamp': '2025-09-30 22:36:45.902945', 'step': 19944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:45.933945', 'step': 19944, 'epoch': 3} {'type': 'loss', 'content': 0.11593834310770035, 'timestamp': '2025-09-30 22:36:45.936505', 'step': 19945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:45.972680', 'step': 19945, 'epoch': 3} {'type': 'loss', 'content': 0.07999235391616821, 'timestamp': '2025-09-30 22:36:45.977355', 'step': 19946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.013206', 'step': 19946, 'epoch': 3} {'type': 'loss', 'content': 0.1203891709446907, 'timestamp': '2025-09-30 22:36:46.017578', 'step': 19947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.061157', 'step': 19947, 'epoch': 3} {'type': 'loss', 'content': 0.0695628970861435, 'timestamp': '2025-09-30 22:36:46.088170', 'step': 19948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.125273', 'step': 19948, 'epoch': 3} {'type': 'loss', 'content': 0.07779063284397125, 'timestamp': '2025-09-30 22:36:46.139000', 'step': 19949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:46.176881', 'step': 19949, 'epoch': 3} {'type': 'loss', 'content': 0.05767500400543213, 'timestamp': '2025-09-30 22:36:46.186728', 'step': 19950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.246740', 'step': 19950, 'epoch': 3} {'type': 'loss', 'content': 0.07962072640657425, 'timestamp': '2025-09-30 22:36:46.254717', 'step': 19951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.287309', 'step': 19951, 'epoch': 3} {'type': 'loss', 'content': 0.07530296593904495, 'timestamp': '2025-09-30 22:36:46.317812', 'step': 19952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.351324', 'step': 19952, 'epoch': 3} {'type': 'loss', 'content': 0.06988538801670074, 'timestamp': '2025-09-30 22:36:46.355127', 'step': 19953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:46.386538', 'step': 19953, 'epoch': 3} {'type': 'loss', 'content': 0.07274511456489563, 'timestamp': '2025-09-30 22:36:46.400108', 'step': 19954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.433411', 'step': 19954, 'epoch': 3} {'type': 'loss', 'content': 0.05379137024283409, 'timestamp': '2025-09-30 22:36:46.439057', 'step': 19955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.472157', 'step': 19955, 'epoch': 3} {'type': 'loss', 'content': 0.07813816517591476, 'timestamp': '2025-09-30 22:36:46.496499', 'step': 19956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:46.533161', 'step': 19956, 'epoch': 3} {'type': 'loss', 'content': 0.09131785482168198, 'timestamp': '2025-09-30 22:36:46.548449', 'step': 19957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.579420', 'step': 19957, 'epoch': 3} {'type': 'loss', 'content': 0.11375650018453598, 'timestamp': '2025-09-30 22:36:46.587041', 'step': 19958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.618718', 'step': 19958, 'epoch': 3} {'type': 'loss', 'content': 0.0372014045715332, 'timestamp': '2025-09-30 22:36:46.624350', 'step': 19959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.658798', 'step': 19959, 'epoch': 3} {'type': 'loss', 'content': 0.08299267292022705, 'timestamp': '2025-09-30 22:36:46.683919', 'step': 19960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.726061', 'step': 19960, 'epoch': 3} {'type': 'loss', 'content': 0.03370529040694237, 'timestamp': '2025-09-30 22:36:46.732394', 'step': 19961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:46.766790', 'step': 19961, 'epoch': 3} {'type': 'loss', 'content': 0.09019052982330322, 'timestamp': '2025-09-30 22:36:46.771232', 'step': 19962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.811829', 'step': 19962, 'epoch': 3} {'type': 'loss', 'content': 0.0861726924777031, 'timestamp': '2025-09-30 22:36:46.817103', 'step': 19963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:46.849594', 'step': 19963, 'epoch': 3} {'type': 'loss', 'content': 0.04867099970579147, 'timestamp': '2025-09-30 22:36:46.881044', 'step': 19964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:46.912777', 'step': 19964, 'epoch': 3} {'type': 'loss', 'content': 0.0660555362701416, 'timestamp': '2025-09-30 22:36:46.925428', 'step': 19965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:46.969535', 'step': 19965, 'epoch': 3} {'type': 'loss', 'content': 0.0370703749358654, 'timestamp': '2025-09-30 22:36:46.975517', 'step': 19966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.008555', 'step': 19966, 'epoch': 3} {'type': 'loss', 'content': 0.08262253552675247, 'timestamp': '2025-09-30 22:36:47.014369', 'step': 19967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.049069', 'step': 19967, 'epoch': 3} {'type': 'loss', 'content': 0.030034415423870087, 'timestamp': '2025-09-30 22:36:47.076333', 'step': 19968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:36:47.109388', 'step': 19968, 'epoch': 3} {'type': 'loss', 'content': 0.07247956097126007, 'timestamp': '2025-09-30 22:36:47.114994', 'step': 19969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.150216', 'step': 19969, 'epoch': 3} {'type': 'loss', 'content': 0.08066389709711075, 'timestamp': '2025-09-30 22:36:47.152726', 'step': 19970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.190333', 'step': 19970, 'epoch': 3} {'type': 'loss', 'content': 0.03495184704661369, 'timestamp': '2025-09-30 22:36:47.194749', 'step': 19971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.227393', 'step': 19971, 'epoch': 3} {'type': 'loss', 'content': 0.08969885110855103, 'timestamp': '2025-09-30 22:36:47.252102', 'step': 19972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.288925', 'step': 19972, 'epoch': 3} {'type': 'loss', 'content': 0.05117034167051315, 'timestamp': '2025-09-30 22:36:47.293129', 'step': 19973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:47.329517', 'step': 19973, 'epoch': 3} {'type': 'loss', 'content': 0.052552901208400726, 'timestamp': '2025-09-30 22:36:47.336785', 'step': 19974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:47.373398', 'step': 19974, 'epoch': 3} {'type': 'loss', 'content': 0.07433857023715973, 'timestamp': '2025-09-30 22:36:47.378123', 'step': 19975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.408593', 'step': 19975, 'epoch': 3} {'type': 'loss', 'content': 0.025568928569555283, 'timestamp': '2025-09-30 22:36:47.435540', 'step': 19976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:47.468294', 'step': 19976, 'epoch': 3} {'type': 'loss', 'content': 0.1324661523103714, 'timestamp': '2025-09-30 22:36:47.474268', 'step': 19977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.506277', 'step': 19977, 'epoch': 3} {'type': 'loss', 'content': 0.032286468893289566, 'timestamp': '2025-09-30 22:36:47.511700', 'step': 19978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:47.544689', 'step': 19978, 'epoch': 3} {'type': 'loss', 'content': 0.05221759155392647, 'timestamp': '2025-09-30 22:36:47.555603', 'step': 19979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:47.586558', 'step': 19979, 'epoch': 3} {'type': 'loss', 'content': 0.06434953212738037, 'timestamp': '2025-09-30 22:36:47.612787', 'step': 19980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.643325', 'step': 19980, 'epoch': 3} {'type': 'loss', 'content': 0.042937543243169785, 'timestamp': '2025-09-30 22:36:47.657021', 'step': 19981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.693395', 'step': 19981, 'epoch': 3} {'type': 'loss', 'content': 0.03261701017618179, 'timestamp': '2025-09-30 22:36:47.698197', 'step': 19982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.730376', 'step': 19982, 'epoch': 3} {'type': 'loss', 'content': 0.03136172518134117, 'timestamp': '2025-09-30 22:36:47.733660', 'step': 19983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.768674', 'step': 19983, 'epoch': 3} {'type': 'loss', 'content': 0.07356354594230652, 'timestamp': '2025-09-30 22:36:47.798303', 'step': 19984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.836732', 'step': 19984, 'epoch': 3} {'type': 'loss', 'content': 0.07326675951480865, 'timestamp': '2025-09-30 22:36:47.840190', 'step': 19985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.881538', 'step': 19985, 'epoch': 3} {'type': 'loss', 'content': 0.07134971767663956, 'timestamp': '2025-09-30 22:36:47.894508', 'step': 19986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:47.931360', 'step': 19986, 'epoch': 3} {'type': 'loss', 'content': 0.04967346787452698, 'timestamp': '2025-09-30 22:36:47.937857', 'step': 19987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:47.983367', 'step': 19987, 'epoch': 3} {'type': 'loss', 'content': 0.009406078606843948, 'timestamp': '2025-09-30 22:36:48.020481', 'step': 19988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:36:48.056474', 'step': 19988, 'epoch': 3} {'type': 'loss', 'content': 0.038349829614162445, 'timestamp': '2025-09-30 22:36:48.061196', 'step': 19989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:48.092488', 'step': 19989, 'epoch': 3} {'type': 'loss', 'content': 0.07754406332969666, 'timestamp': '2025-09-30 22:36:48.098845', 'step': 19990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:48.133339', 'step': 19990, 'epoch': 3} {'type': 'loss', 'content': 0.06106824055314064, 'timestamp': '2025-09-30 22:36:48.136939', 'step': 19991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:48.170583', 'step': 19991, 'epoch': 3} {'type': 'loss', 'content': 0.022212129086256027, 'timestamp': '2025-09-30 22:36:48.196901', 'step': 19992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:48.228931', 'step': 19992, 'epoch': 3} {'type': 'loss', 'content': 0.034905701875686646, 'timestamp': '2025-09-30 22:36:48.235926', 'step': 19993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:48.273060', 'step': 19993, 'epoch': 3} {'type': 'loss', 'content': 0.06413359194993973, 'timestamp': '2025-09-30 22:36:48.276455', 'step': 19994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:48.307539', 'step': 19994, 'epoch': 3} {'type': 'loss', 'content': 0.051552664488554, 'timestamp': '2025-09-30 22:36:48.311535', 'step': 19995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:48.342699', 'step': 19995, 'epoch': 3} {'type': 'loss', 'content': 0.05663561075925827, 'timestamp': '2025-09-30 22:36:48.368224', 'step': 19996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:48.403380', 'step': 19996, 'epoch': 3} {'type': 'loss', 'content': 0.034770190715789795, 'timestamp': '2025-09-30 22:36:48.407556', 'step': 19997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:48.440396', 'step': 19997, 'epoch': 3} {'type': 'loss', 'content': 0.07772939652204514, 'timestamp': '2025-09-30 22:36:48.448669', 'step': 19998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:48.487872', 'step': 19998, 'epoch': 3} {'type': 'loss', 'content': 0.07826533168554306, 'timestamp': '2025-09-30 22:36:48.491972', 'step': 19999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:48.523021', 'step': 19999, 'epoch': 3} {'type': 'loss', 'content': 0.011157545261085033, 'timestamp': '2025-09-30 22:36:48.549408', 'step': 20000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20000', 'timestamp': '2025-09-30 22:36:53.724886', 'step': 20000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:53.757775', 'step': 20000, 'epoch': 3} {'type': 'loss', 'content': 0.05152692273259163, 'timestamp': '2025-09-30 22:36:53.761720', 'step': 20001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:53.800587', 'step': 20001, 'epoch': 3} {'type': 'loss', 'content': 0.14632399380207062, 'timestamp': '2025-09-30 22:36:53.803865', 'step': 20002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:53.836881', 'step': 20002, 'epoch': 3} {'type': 'loss', 'content': 0.06031939014792442, 'timestamp': '2025-09-30 22:36:53.840579', 'step': 20003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:53.871815', 'step': 20003, 'epoch': 3} {'type': 'loss', 'content': 0.03641722723841667, 'timestamp': '2025-09-30 22:36:53.897494', 'step': 20004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:53.928621', 'step': 20004, 'epoch': 3} {'type': 'loss', 'content': 0.05228292569518089, 'timestamp': '2025-09-30 22:36:53.934977', 'step': 20005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:53.968082', 'step': 20005, 'epoch': 3} {'type': 'loss', 'content': 0.0691525861620903, 'timestamp': '2025-09-30 22:36:53.971720', 'step': 20006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:54.005985', 'step': 20006, 'epoch': 3} {'type': 'loss', 'content': 0.11306504160165787, 'timestamp': '2025-09-30 22:36:54.008933', 'step': 20007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.041851', 'step': 20007, 'epoch': 3} {'type': 'loss', 'content': 0.06027434766292572, 'timestamp': '2025-09-30 22:36:54.067075', 'step': 20008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:54.098738', 'step': 20008, 'epoch': 3} {'type': 'loss', 'content': 0.04791712388396263, 'timestamp': '2025-09-30 22:36:54.103886', 'step': 20009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.134240', 'step': 20009, 'epoch': 3} {'type': 'loss', 'content': 0.12603729963302612, 'timestamp': '2025-09-30 22:36:54.136846', 'step': 20010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.169368', 'step': 20010, 'epoch': 3} {'type': 'loss', 'content': 0.06620454788208008, 'timestamp': '2025-09-30 22:36:54.173924', 'step': 20011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:54.205557', 'step': 20011, 'epoch': 3} {'type': 'loss', 'content': 0.11798267066478729, 'timestamp': '2025-09-30 22:36:54.229890', 'step': 20012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.260509', 'step': 20012, 'epoch': 3} {'type': 'loss', 'content': 0.01943184994161129, 'timestamp': '2025-09-30 22:36:54.265026', 'step': 20013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.296327', 'step': 20013, 'epoch': 3} {'type': 'loss', 'content': 0.08041109144687653, 'timestamp': '2025-09-30 22:36:54.301062', 'step': 20014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.333595', 'step': 20014, 'epoch': 3} {'type': 'loss', 'content': 0.13484396040439606, 'timestamp': '2025-09-30 22:36:54.338092', 'step': 20015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.371602', 'step': 20015, 'epoch': 3} {'type': 'loss', 'content': 0.09824812412261963, 'timestamp': '2025-09-30 22:36:54.404225', 'step': 20016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.434905', 'step': 20016, 'epoch': 3} {'type': 'loss', 'content': 0.07334896922111511, 'timestamp': '2025-09-30 22:36:54.439360', 'step': 20017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.470504', 'step': 20017, 'epoch': 3} {'type': 'loss', 'content': 0.06939014792442322, 'timestamp': '2025-09-30 22:36:54.472812', 'step': 20018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.506053', 'step': 20018, 'epoch': 3} {'type': 'loss', 'content': 0.1269523650407791, 'timestamp': '2025-09-30 22:36:54.510253', 'step': 20019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:54.541059', 'step': 20019, 'epoch': 3} {'type': 'loss', 'content': 0.077876977622509, 'timestamp': '2025-09-30 22:36:54.568120', 'step': 20020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:54.599362', 'step': 20020, 'epoch': 3} {'type': 'loss', 'content': 0.026609785854816437, 'timestamp': '2025-09-30 22:36:54.601884', 'step': 20021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.637331', 'step': 20021, 'epoch': 3} {'type': 'loss', 'content': 0.07780029624700546, 'timestamp': '2025-09-30 22:36:54.641812', 'step': 20022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.674978', 'step': 20022, 'epoch': 3} {'type': 'loss', 'content': 0.12468872219324112, 'timestamp': '2025-09-30 22:36:54.678087', 'step': 20023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.708553', 'step': 20023, 'epoch': 3} {'type': 'loss', 'content': 0.11203673481941223, 'timestamp': '2025-09-30 22:36:54.735071', 'step': 20024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.766074', 'step': 20024, 'epoch': 3} {'type': 'loss', 'content': 0.040612053126096725, 'timestamp': '2025-09-30 22:36:54.769427', 'step': 20025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.813351', 'step': 20025, 'epoch': 3} {'type': 'loss', 'content': 0.02414117380976677, 'timestamp': '2025-09-30 22:36:54.820054', 'step': 20026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:54.853875', 'step': 20026, 'epoch': 3} {'type': 'loss', 'content': 0.07356518507003784, 'timestamp': '2025-09-30 22:36:54.856559', 'step': 20027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.887434', 'step': 20027, 'epoch': 3} {'type': 'loss', 'content': 0.08011390268802643, 'timestamp': '2025-09-30 22:36:54.912251', 'step': 20028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:54.943100', 'step': 20028, 'epoch': 3} {'type': 'loss', 'content': 0.1159534826874733, 'timestamp': '2025-09-30 22:36:54.945586', 'step': 20029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:36:54.976943', 'step': 20029, 'epoch': 3} {'type': 'loss', 'content': 0.1332477331161499, 'timestamp': '2025-09-30 22:36:54.980294', 'step': 20030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:55.014294', 'step': 20030, 'epoch': 3} {'type': 'loss', 'content': 0.06089591979980469, 'timestamp': '2025-09-30 22:36:55.017003', 'step': 20031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:55.047725', 'step': 20031, 'epoch': 3} {'type': 'loss', 'content': 0.05371524766087532, 'timestamp': '2025-09-30 22:36:55.072397', 'step': 20032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:55.103376', 'step': 20032, 'epoch': 3} {'type': 'loss', 'content': 0.0864618569612503, 'timestamp': '2025-09-30 22:36:55.106683', 'step': 20033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:36:55.139323', 'step': 20033, 'epoch': 3} {'type': 'loss', 'content': 0.08725343644618988, 'timestamp': '2025-09-30 22:36:55.141676', 'step': 20034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:55.173667', 'step': 20034, 'epoch': 3} {'type': 'loss', 'content': 0.1032906174659729, 'timestamp': '2025-09-30 22:36:55.180429', 'step': 20035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:36:55.215178', 'step': 20035, 'epoch': 3} {'type': 'loss', 'content': 0.04378955438733101, 'timestamp': '2025-09-30 22:36:55.239416', 'step': 20036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:36:55.271196', 'step': 20036, 'epoch': 3} {'type': 'loss', 'content': 0.08033902943134308, 'timestamp': '2025-09-30 22:36:55.274011', 'step': 20037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:36:55.305325', 'step': 20037, 'epoch': 3} {'type': 'loss', 'content': 0.10073182731866837, 'timestamp': '2025-09-30 22:36:55.309075', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:37:02.983811', 'step': 20038, 'epoch': 3} {'type': 'pplx', 'content': 11142.442322239374, 'timestamp': '2025-09-30 22:37:02.989774', 'step': 20038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.021303', 'step': 20038, 'epoch': 3} {'type': 'loss', 'content': 0.05723896622657776, 'timestamp': '2025-09-30 22:37:03.027743', 'step': 20039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.062914', 'step': 20039, 'epoch': 3} {'type': 'loss', 'content': 0.13704729080200195, 'timestamp': '2025-09-30 22:37:03.091958', 'step': 20040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.124491', 'step': 20040, 'epoch': 3} {'type': 'loss', 'content': 0.06822357326745987, 'timestamp': '2025-09-30 22:37:03.128713', 'step': 20041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.162161', 'step': 20041, 'epoch': 3} {'type': 'loss', 'content': 0.05001421645283699, 'timestamp': '2025-09-30 22:37:03.169008', 'step': 20042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:03.205657', 'step': 20042, 'epoch': 3} {'type': 'loss', 'content': 0.047917015850543976, 'timestamp': '2025-09-30 22:37:03.210277', 'step': 20043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.244570', 'step': 20043, 'epoch': 3} {'type': 'loss', 'content': 0.06636369973421097, 'timestamp': '2025-09-30 22:37:03.272031', 'step': 20044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.305193', 'step': 20044, 'epoch': 3} {'type': 'loss', 'content': 0.03419477865099907, 'timestamp': '2025-09-30 22:37:03.309441', 'step': 20045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:03.341037', 'step': 20045, 'epoch': 3} {'type': 'loss', 'content': 0.06674619019031525, 'timestamp': '2025-09-30 22:37:03.345391', 'step': 20046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.385588', 'step': 20046, 'epoch': 3} {'type': 'loss', 'content': 0.025898203253746033, 'timestamp': '2025-09-30 22:37:03.389248', 'step': 20047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.430469', 'step': 20047, 'epoch': 3} {'type': 'loss', 'content': 0.038755640387535095, 'timestamp': '2025-09-30 22:37:03.456263', 'step': 20048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.486591', 'step': 20048, 'epoch': 3} {'type': 'loss', 'content': 0.03652147576212883, 'timestamp': '2025-09-30 22:37:03.489927', 'step': 20049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:03.520625', 'step': 20049, 'epoch': 3} {'type': 'loss', 'content': 0.07380983233451843, 'timestamp': '2025-09-30 22:37:03.533650', 'step': 20050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.567866', 'step': 20050, 'epoch': 3} {'type': 'loss', 'content': 0.053539056330919266, 'timestamp': '2025-09-30 22:37:03.571331', 'step': 20051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.602200', 'step': 20051, 'epoch': 3} {'type': 'loss', 'content': 0.028766898438334465, 'timestamp': '2025-09-30 22:37:03.627542', 'step': 20052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.658166', 'step': 20052, 'epoch': 3} {'type': 'loss', 'content': 0.04500657320022583, 'timestamp': '2025-09-30 22:37:03.662900', 'step': 20053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:03.694741', 'step': 20053, 'epoch': 3} {'type': 'loss', 'content': 0.05210430175065994, 'timestamp': '2025-09-30 22:37:03.700701', 'step': 20054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:03.737786', 'step': 20054, 'epoch': 3} {'type': 'loss', 'content': 0.034856781363487244, 'timestamp': '2025-09-30 22:37:03.742660', 'step': 20055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.775887', 'step': 20055, 'epoch': 3} {'type': 'loss', 'content': 0.11113519966602325, 'timestamp': '2025-09-30 22:37:03.800498', 'step': 20056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.833471', 'step': 20056, 'epoch': 3} {'type': 'loss', 'content': 0.055069699883461, 'timestamp': '2025-09-30 22:37:03.835829', 'step': 20057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.867169', 'step': 20057, 'epoch': 3} {'type': 'loss', 'content': 0.059278082102537155, 'timestamp': '2025-09-30 22:37:03.870758', 'step': 20058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:03.902040', 'step': 20058, 'epoch': 3} {'type': 'loss', 'content': 0.1730823814868927, 'timestamp': '2025-09-30 22:37:03.905600', 'step': 20059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:03.937811', 'step': 20059, 'epoch': 3} {'type': 'loss', 'content': 0.05710285156965256, 'timestamp': '2025-09-30 22:37:03.962591', 'step': 20060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:03.993458', 'step': 20060, 'epoch': 3} {'type': 'loss', 'content': 0.12588226795196533, 'timestamp': '2025-09-30 22:37:03.997728', 'step': 20061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:04.039416', 'step': 20061, 'epoch': 3} {'type': 'loss', 'content': 0.029566030949354172, 'timestamp': '2025-09-30 22:37:04.043497', 'step': 20062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:04.074615', 'step': 20062, 'epoch': 3} {'type': 'loss', 'content': 0.028875628486275673, 'timestamp': '2025-09-30 22:37:04.083471', 'step': 20063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.115676', 'step': 20063, 'epoch': 3} {'type': 'loss', 'content': 0.06497305631637573, 'timestamp': '2025-09-30 22:37:04.141272', 'step': 20064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.172874', 'step': 20064, 'epoch': 3} {'type': 'loss', 'content': 0.03699357062578201, 'timestamp': '2025-09-30 22:37:04.177354', 'step': 20065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:04.211403', 'step': 20065, 'epoch': 3} {'type': 'loss', 'content': 0.049595486372709274, 'timestamp': '2025-09-30 22:37:04.215395', 'step': 20066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:04.247491', 'step': 20066, 'epoch': 3} {'type': 'loss', 'content': 0.03234073147177696, 'timestamp': '2025-09-30 22:37:04.254396', 'step': 20067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:04.284669', 'step': 20067, 'epoch': 3} {'type': 'loss', 'content': 0.05460715666413307, 'timestamp': '2025-09-30 22:37:04.309300', 'step': 20068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:04.340635', 'step': 20068, 'epoch': 3} {'type': 'loss', 'content': 0.005754828918725252, 'timestamp': '2025-09-30 22:37:04.358516', 'step': 20069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.391311', 'step': 20069, 'epoch': 3} {'type': 'loss', 'content': 0.08303939551115036, 'timestamp': '2025-09-30 22:37:04.396127', 'step': 20070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:04.430020', 'step': 20070, 'epoch': 3} {'type': 'loss', 'content': 0.049632780253887177, 'timestamp': '2025-09-30 22:37:04.434503', 'step': 20071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:04.466897', 'step': 20071, 'epoch': 3} {'type': 'loss', 'content': 0.09106117486953735, 'timestamp': '2025-09-30 22:37:04.491830', 'step': 20072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:04.526877', 'step': 20072, 'epoch': 3} {'type': 'loss', 'content': 0.08795705437660217, 'timestamp': '2025-09-30 22:37:04.532500', 'step': 20073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:04.563330', 'step': 20073, 'epoch': 3} {'type': 'loss', 'content': 0.05599503964185715, 'timestamp': '2025-09-30 22:37:04.566715', 'step': 20074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:04.599523', 'step': 20074, 'epoch': 3} {'type': 'loss', 'content': 0.07998476177453995, 'timestamp': '2025-09-30 22:37:04.602730', 'step': 20075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:04.633946', 'step': 20075, 'epoch': 3} {'type': 'loss', 'content': 0.050487909466028214, 'timestamp': '2025-09-30 22:37:04.661627', 'step': 20076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.692749', 'step': 20076, 'epoch': 3} {'type': 'loss', 'content': 0.04888167977333069, 'timestamp': '2025-09-30 22:37:04.695856', 'step': 20077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.726162', 'step': 20077, 'epoch': 3} {'type': 'loss', 'content': 0.04993867501616478, 'timestamp': '2025-09-30 22:37:04.734479', 'step': 20078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.764824', 'step': 20078, 'epoch': 3} {'type': 'loss', 'content': 0.1318439096212387, 'timestamp': '2025-09-30 22:37:04.769402', 'step': 20079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.801861', 'step': 20079, 'epoch': 3} {'type': 'loss', 'content': 0.14786465466022491, 'timestamp': '2025-09-30 22:37:04.828936', 'step': 20080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.861873', 'step': 20080, 'epoch': 3} {'type': 'loss', 'content': 0.01113130897283554, 'timestamp': '2025-09-30 22:37:04.865140', 'step': 20081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:04.895804', 'step': 20081, 'epoch': 3} {'type': 'loss', 'content': 0.05506094545125961, 'timestamp': '2025-09-30 22:37:04.909011', 'step': 20082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:04.941376', 'step': 20082, 'epoch': 3} {'type': 'loss', 'content': 0.08521147072315216, 'timestamp': '2025-09-30 22:37:04.943759', 'step': 20083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:04.975782', 'step': 20083, 'epoch': 3} {'type': 'loss', 'content': 0.09080706536769867, 'timestamp': '2025-09-30 22:37:05.002093', 'step': 20084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:05.032865', 'step': 20084, 'epoch': 3} {'type': 'loss', 'content': 0.07695046812295914, 'timestamp': '2025-09-30 22:37:05.037392', 'step': 20085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.068496', 'step': 20085, 'epoch': 3} {'type': 'loss', 'content': 0.06386080384254456, 'timestamp': '2025-09-30 22:37:05.074367', 'step': 20086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:05.113885', 'step': 20086, 'epoch': 3} {'type': 'loss', 'content': 0.13602980971336365, 'timestamp': '2025-09-30 22:37:05.117915', 'step': 20087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:05.150798', 'step': 20087, 'epoch': 3} {'type': 'loss', 'content': 0.03287952393293381, 'timestamp': '2025-09-30 22:37:05.177090', 'step': 20088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:05.207348', 'step': 20088, 'epoch': 3} {'type': 'loss', 'content': 0.10332731902599335, 'timestamp': '2025-09-30 22:37:05.212225', 'step': 20089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:05.243082', 'step': 20089, 'epoch': 3} {'type': 'loss', 'content': 0.052819229662418365, 'timestamp': '2025-09-30 22:37:05.252908', 'step': 20090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:05.285441', 'step': 20090, 'epoch': 3} {'type': 'loss', 'content': 0.10268142074346542, 'timestamp': '2025-09-30 22:37:05.289314', 'step': 20091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.320390', 'step': 20091, 'epoch': 3} {'type': 'loss', 'content': 0.08717581629753113, 'timestamp': '2025-09-30 22:37:05.345413', 'step': 20092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:05.379633', 'step': 20092, 'epoch': 3} {'type': 'loss', 'content': 0.007031059358268976, 'timestamp': '2025-09-30 22:37:05.384611', 'step': 20093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.417415', 'step': 20093, 'epoch': 3} {'type': 'loss', 'content': 0.0679069384932518, 'timestamp': '2025-09-30 22:37:05.421463', 'step': 20094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:05.453169', 'step': 20094, 'epoch': 3} {'type': 'loss', 'content': 0.07003398984670639, 'timestamp': '2025-09-30 22:37:05.457318', 'step': 20095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.489470', 'step': 20095, 'epoch': 3} {'type': 'loss', 'content': 0.056495122611522675, 'timestamp': '2025-09-30 22:37:05.513493', 'step': 20096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:05.546078', 'step': 20096, 'epoch': 3} {'type': 'loss', 'content': 0.0429873950779438, 'timestamp': '2025-09-30 22:37:05.550343', 'step': 20097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:05.582831', 'step': 20097, 'epoch': 3} {'type': 'loss', 'content': 0.019411783665418625, 'timestamp': '2025-09-30 22:37:05.596037', 'step': 20098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:05.627184', 'step': 20098, 'epoch': 3} {'type': 'loss', 'content': 0.022028746083378792, 'timestamp': '2025-09-30 22:37:05.633329', 'step': 20099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:05.664722', 'step': 20099, 'epoch': 3} {'type': 'loss', 'content': 0.05920780450105667, 'timestamp': '2025-09-30 22:37:05.690376', 'step': 20100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.720801', 'step': 20100, 'epoch': 3} {'type': 'loss', 'content': 0.06824402511119843, 'timestamp': '2025-09-30 22:37:05.724255', 'step': 20101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:05.762262', 'step': 20101, 'epoch': 3} {'type': 'loss', 'content': 0.10966194421052933, 'timestamp': '2025-09-30 22:37:05.771120', 'step': 20102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.804600', 'step': 20102, 'epoch': 3} {'type': 'loss', 'content': 0.10226122289896011, 'timestamp': '2025-09-30 22:37:05.811291', 'step': 20103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.845072', 'step': 20103, 'epoch': 3} {'type': 'loss', 'content': 0.0685703381896019, 'timestamp': '2025-09-30 22:37:05.871243', 'step': 20104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:05.901882', 'step': 20104, 'epoch': 3} {'type': 'loss', 'content': 0.0716436579823494, 'timestamp': '2025-09-30 22:37:05.904773', 'step': 20105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:05.935652', 'step': 20105, 'epoch': 3} {'type': 'loss', 'content': 0.0740051195025444, 'timestamp': '2025-09-30 22:37:05.940077', 'step': 20106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:05.978511', 'step': 20106, 'epoch': 3} {'type': 'loss', 'content': 0.025694720447063446, 'timestamp': '2025-09-30 22:37:05.982356', 'step': 20107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.015620', 'step': 20107, 'epoch': 3} {'type': 'loss', 'content': 0.11397796869277954, 'timestamp': '2025-09-30 22:37:06.040621', 'step': 20108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.073899', 'step': 20108, 'epoch': 3} {'type': 'loss', 'content': 0.0641443207859993, 'timestamp': '2025-09-30 22:37:06.085663', 'step': 20109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:06.121482', 'step': 20109, 'epoch': 3} {'type': 'loss', 'content': 0.045152612030506134, 'timestamp': '2025-09-30 22:37:06.126206', 'step': 20110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.165313', 'step': 20110, 'epoch': 3} {'type': 'loss', 'content': 0.0452236607670784, 'timestamp': '2025-09-30 22:37:06.176136', 'step': 20111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:06.216470', 'step': 20111, 'epoch': 3} {'type': 'loss', 'content': 0.08933184295892715, 'timestamp': '2025-09-30 22:37:06.242095', 'step': 20112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:06.275426', 'step': 20112, 'epoch': 3} {'type': 'loss', 'content': 0.06514845043420792, 'timestamp': '2025-09-30 22:37:06.282268', 'step': 20113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.314316', 'step': 20113, 'epoch': 3} {'type': 'loss', 'content': 0.04131251201033592, 'timestamp': '2025-09-30 22:37:06.327984', 'step': 20114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:06.363153', 'step': 20114, 'epoch': 3} {'type': 'loss', 'content': 0.03906386345624924, 'timestamp': '2025-09-30 22:37:06.366489', 'step': 20115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.397590', 'step': 20115, 'epoch': 3} {'type': 'loss', 'content': 0.05692634731531143, 'timestamp': '2025-09-30 22:37:06.423093', 'step': 20116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:06.458090', 'step': 20116, 'epoch': 3} {'type': 'loss', 'content': 0.1448380947113037, 'timestamp': '2025-09-30 22:37:06.463336', 'step': 20117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.495446', 'step': 20117, 'epoch': 3} {'type': 'loss', 'content': 0.07215754687786102, 'timestamp': '2025-09-30 22:37:06.509476', 'step': 20118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:06.540588', 'step': 20118, 'epoch': 3} {'type': 'loss', 'content': 0.04688333347439766, 'timestamp': '2025-09-30 22:37:06.543477', 'step': 20119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:06.576161', 'step': 20119, 'epoch': 3} {'type': 'loss', 'content': 0.13113591074943542, 'timestamp': '2025-09-30 22:37:06.616912', 'step': 20120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.656015', 'step': 20120, 'epoch': 3} {'type': 'loss', 'content': 0.1222226545214653, 'timestamp': '2025-09-30 22:37:06.660620', 'step': 20121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:06.693624', 'step': 20121, 'epoch': 3} {'type': 'loss', 'content': 0.10320226103067398, 'timestamp': '2025-09-30 22:37:06.695851', 'step': 20122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:06.738088', 'step': 20122, 'epoch': 3} {'type': 'loss', 'content': 0.04511100798845291, 'timestamp': '2025-09-30 22:37:06.755792', 'step': 20123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:06.787156', 'step': 20123, 'epoch': 3} {'type': 'loss', 'content': 0.04622999206185341, 'timestamp': '2025-09-30 22:37:06.820402', 'step': 20124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:06.857110', 'step': 20124, 'epoch': 3} {'type': 'loss', 'content': 0.032681629061698914, 'timestamp': '2025-09-30 22:37:06.866516', 'step': 20125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:06.901517', 'step': 20125, 'epoch': 3} {'type': 'loss', 'content': 0.06027893349528313, 'timestamp': '2025-09-30 22:37:06.908581', 'step': 20126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:06.941635', 'step': 20126, 'epoch': 3} {'type': 'loss', 'content': 0.051705703139305115, 'timestamp': '2025-09-30 22:37:06.955432', 'step': 20127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:06.998216', 'step': 20127, 'epoch': 3} {'type': 'loss', 'content': 0.03844054788351059, 'timestamp': '2025-09-30 22:37:07.025542', 'step': 20128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.064545', 'step': 20128, 'epoch': 3} {'type': 'loss', 'content': 0.08853451907634735, 'timestamp': '2025-09-30 22:37:07.073481', 'step': 20129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.109556', 'step': 20129, 'epoch': 3} {'type': 'loss', 'content': 0.07700570672750473, 'timestamp': '2025-09-30 22:37:07.113738', 'step': 20130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.148158', 'step': 20130, 'epoch': 3} {'type': 'loss', 'content': 0.029666287824511528, 'timestamp': '2025-09-30 22:37:07.157280', 'step': 20131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.194888', 'step': 20131, 'epoch': 3} {'type': 'loss', 'content': 0.04866993427276611, 'timestamp': '2025-09-30 22:37:07.228860', 'step': 20132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.263511', 'step': 20132, 'epoch': 3} {'type': 'loss', 'content': 0.05259127542376518, 'timestamp': '2025-09-30 22:37:07.267432', 'step': 20133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.300484', 'step': 20133, 'epoch': 3} {'type': 'loss', 'content': 0.11895813792943954, 'timestamp': '2025-09-30 22:37:07.303595', 'step': 20134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.341146', 'step': 20134, 'epoch': 3} {'type': 'loss', 'content': 0.049418855458498, 'timestamp': '2025-09-30 22:37:07.346638', 'step': 20135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.387247', 'step': 20135, 'epoch': 3} {'type': 'loss', 'content': 0.06010947749018669, 'timestamp': '2025-09-30 22:37:07.422598', 'step': 20136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.456623', 'step': 20136, 'epoch': 3} {'type': 'loss', 'content': 0.06093659996986389, 'timestamp': '2025-09-30 22:37:07.482916', 'step': 20137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.518150', 'step': 20137, 'epoch': 3} {'type': 'loss', 'content': 0.057067714631557465, 'timestamp': '2025-09-30 22:37:07.520717', 'step': 20138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.554007', 'step': 20138, 'epoch': 3} {'type': 'loss', 'content': 0.07898300886154175, 'timestamp': '2025-09-30 22:37:07.557863', 'step': 20139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.591382', 'step': 20139, 'epoch': 3} {'type': 'loss', 'content': 0.03678388521075249, 'timestamp': '2025-09-30 22:37:07.616701', 'step': 20140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:07.647001', 'step': 20140, 'epoch': 3} {'type': 'loss', 'content': 0.10962405800819397, 'timestamp': '2025-09-30 22:37:07.651123', 'step': 20141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.682622', 'step': 20141, 'epoch': 3} {'type': 'loss', 'content': 0.046044331043958664, 'timestamp': '2025-09-30 22:37:07.696227', 'step': 20142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:07.727337', 'step': 20142, 'epoch': 3} {'type': 'loss', 'content': 0.014940919354557991, 'timestamp': '2025-09-30 22:37:07.730515', 'step': 20143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:07.765304', 'step': 20143, 'epoch': 3} {'type': 'loss', 'content': 0.08712948113679886, 'timestamp': '2025-09-30 22:37:07.789527', 'step': 20144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.840316', 'step': 20144, 'epoch': 3} {'type': 'loss', 'content': 0.03057798743247986, 'timestamp': '2025-09-30 22:37:07.844305', 'step': 20145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:07.879550', 'step': 20145, 'epoch': 3} {'type': 'loss', 'content': 0.010250838473439217, 'timestamp': '2025-09-30 22:37:07.882631', 'step': 20146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:07.918418', 'step': 20146, 'epoch': 3} {'type': 'loss', 'content': 0.08081572502851486, 'timestamp': '2025-09-30 22:37:07.921217', 'step': 20147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:07.954950', 'step': 20147, 'epoch': 3} {'type': 'loss', 'content': 0.023676851764321327, 'timestamp': '2025-09-30 22:37:07.979491', 'step': 20148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.009650', 'step': 20148, 'epoch': 3} {'type': 'loss', 'content': 0.07647686451673508, 'timestamp': '2025-09-30 22:37:08.013258', 'step': 20149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.044666', 'step': 20149, 'epoch': 3} {'type': 'loss', 'content': 0.09531613439321518, 'timestamp': '2025-09-30 22:37:08.047561', 'step': 20150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.079318', 'step': 20150, 'epoch': 3} {'type': 'loss', 'content': 0.014512062072753906, 'timestamp': '2025-09-30 22:37:08.083157', 'step': 20151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:08.114825', 'step': 20151, 'epoch': 3} {'type': 'loss', 'content': 0.007649456150829792, 'timestamp': '2025-09-30 22:37:08.147000', 'step': 20152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.178451', 'step': 20152, 'epoch': 3} {'type': 'loss', 'content': 0.018066320568323135, 'timestamp': '2025-09-30 22:37:08.182780', 'step': 20153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:08.215549', 'step': 20153, 'epoch': 3} {'type': 'loss', 'content': 0.10218589007854462, 'timestamp': '2025-09-30 22:37:08.218819', 'step': 20154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.251701', 'step': 20154, 'epoch': 3} {'type': 'loss', 'content': 0.004315465688705444, 'timestamp': '2025-09-30 22:37:08.254650', 'step': 20155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:08.286984', 'step': 20155, 'epoch': 3} {'type': 'loss', 'content': 0.0338483527302742, 'timestamp': '2025-09-30 22:37:08.314558', 'step': 20156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.347295', 'step': 20156, 'epoch': 3} {'type': 'loss', 'content': 0.07702558487653732, 'timestamp': '2025-09-30 22:37:08.351707', 'step': 20157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:08.381822', 'step': 20157, 'epoch': 3} {'type': 'loss', 'content': 0.07164312899112701, 'timestamp': '2025-09-30 22:37:08.387696', 'step': 20158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.420157', 'step': 20158, 'epoch': 3} {'type': 'loss', 'content': 0.07167152315378189, 'timestamp': '2025-09-30 22:37:08.423976', 'step': 20159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.457846', 'step': 20159, 'epoch': 3} {'type': 'loss', 'content': 0.0670371800661087, 'timestamp': '2025-09-30 22:37:08.483945', 'step': 20160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.515050', 'step': 20160, 'epoch': 3} {'type': 'loss', 'content': 0.04372713714838028, 'timestamp': '2025-09-30 22:37:08.519340', 'step': 20161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.551558', 'step': 20161, 'epoch': 3} {'type': 'loss', 'content': 0.10612993687391281, 'timestamp': '2025-09-30 22:37:08.564701', 'step': 20162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.596140', 'step': 20162, 'epoch': 3} {'type': 'loss', 'content': 0.045952826738357544, 'timestamp': '2025-09-30 22:37:08.598575', 'step': 20163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:08.628935', 'step': 20163, 'epoch': 3} {'type': 'loss', 'content': 0.08666805177927017, 'timestamp': '2025-09-30 22:37:08.653347', 'step': 20164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.686395', 'step': 20164, 'epoch': 3} {'type': 'loss', 'content': 0.01781843975186348, 'timestamp': '2025-09-30 22:37:08.689106', 'step': 20165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.719509', 'step': 20165, 'epoch': 3} {'type': 'loss', 'content': 0.05435490235686302, 'timestamp': '2025-09-30 22:37:08.723377', 'step': 20166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:08.755048', 'step': 20166, 'epoch': 3} {'type': 'loss', 'content': 0.0628715232014656, 'timestamp': '2025-09-30 22:37:08.759826', 'step': 20167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:08.800076', 'step': 20167, 'epoch': 3} {'type': 'loss', 'content': 0.07864955812692642, 'timestamp': '2025-09-30 22:37:08.826170', 'step': 20168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.864403', 'step': 20168, 'epoch': 3} {'type': 'loss', 'content': 0.01789592020213604, 'timestamp': '2025-09-30 22:37:08.874792', 'step': 20169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:08.910468', 'step': 20169, 'epoch': 3} {'type': 'loss', 'content': 0.0588359571993351, 'timestamp': '2025-09-30 22:37:08.914265', 'step': 20170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:08.946184', 'step': 20170, 'epoch': 3} {'type': 'loss', 'content': 0.03801652789115906, 'timestamp': '2025-09-30 22:37:08.950162', 'step': 20171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:08.983588', 'step': 20171, 'epoch': 3} {'type': 'loss', 'content': 0.05582188814878464, 'timestamp': '2025-09-30 22:37:09.010110', 'step': 20172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.041684', 'step': 20172, 'epoch': 3} {'type': 'loss', 'content': 0.051352065056562424, 'timestamp': '2025-09-30 22:37:09.044638', 'step': 20173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.078259', 'step': 20173, 'epoch': 3} {'type': 'loss', 'content': 0.09691684693098068, 'timestamp': '2025-09-30 22:37:09.082609', 'step': 20174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:09.116900', 'step': 20174, 'epoch': 3} {'type': 'loss', 'content': 0.07590624690055847, 'timestamp': '2025-09-30 22:37:09.119694', 'step': 20175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.156332', 'step': 20175, 'epoch': 3} {'type': 'loss', 'content': 0.04811332747340202, 'timestamp': '2025-09-30 22:37:09.181816', 'step': 20176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:09.214162', 'step': 20176, 'epoch': 3} {'type': 'loss', 'content': 0.06324026733636856, 'timestamp': '2025-09-30 22:37:09.221216', 'step': 20177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:09.259097', 'step': 20177, 'epoch': 3} {'type': 'loss', 'content': 0.06760694086551666, 'timestamp': '2025-09-30 22:37:09.264780', 'step': 20178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:09.298453', 'step': 20178, 'epoch': 3} {'type': 'loss', 'content': 0.04001892730593681, 'timestamp': '2025-09-30 22:37:09.302545', 'step': 20179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.335630', 'step': 20179, 'epoch': 3} {'type': 'loss', 'content': 0.03679204732179642, 'timestamp': '2025-09-30 22:37:09.360303', 'step': 20180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:09.391606', 'step': 20180, 'epoch': 3} {'type': 'loss', 'content': 0.014022515155375004, 'timestamp': '2025-09-30 22:37:09.393938', 'step': 20181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.424437', 'step': 20181, 'epoch': 3} {'type': 'loss', 'content': 0.03164590150117874, 'timestamp': '2025-09-30 22:37:09.429561', 'step': 20182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:09.462145', 'step': 20182, 'epoch': 3} {'type': 'loss', 'content': 0.03090411238372326, 'timestamp': '2025-09-30 22:37:09.465917', 'step': 20183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:09.499507', 'step': 20183, 'epoch': 3} {'type': 'loss', 'content': 0.04317860305309296, 'timestamp': '2025-09-30 22:37:09.526064', 'step': 20184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:09.560565', 'step': 20184, 'epoch': 3} {'type': 'loss', 'content': 0.08866873383522034, 'timestamp': '2025-09-30 22:37:09.565042', 'step': 20185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:09.599830', 'step': 20185, 'epoch': 3} {'type': 'loss', 'content': 0.06601555645465851, 'timestamp': '2025-09-30 22:37:09.603418', 'step': 20186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:09.634816', 'step': 20186, 'epoch': 3} {'type': 'loss', 'content': 0.06531570106744766, 'timestamp': '2025-09-30 22:37:09.637597', 'step': 20187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.669323', 'step': 20187, 'epoch': 3} {'type': 'loss', 'content': 0.012705161236226559, 'timestamp': '2025-09-30 22:37:09.694780', 'step': 20188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:09.725649', 'step': 20188, 'epoch': 3} {'type': 'loss', 'content': 0.07023382186889648, 'timestamp': '2025-09-30 22:37:09.732794', 'step': 20189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.770651', 'step': 20189, 'epoch': 3} {'type': 'loss', 'content': 0.09681584686040878, 'timestamp': '2025-09-30 22:37:09.774828', 'step': 20190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.805944', 'step': 20190, 'epoch': 3} {'type': 'loss', 'content': 0.08246006816625595, 'timestamp': '2025-09-30 22:37:09.809465', 'step': 20191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:09.840770', 'step': 20191, 'epoch': 3} {'type': 'loss', 'content': 0.01351112313568592, 'timestamp': '2025-09-30 22:37:09.866232', 'step': 20192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:09.905879', 'step': 20192, 'epoch': 3} {'type': 'loss', 'content': 0.1251572221517563, 'timestamp': '2025-09-30 22:37:09.914348', 'step': 20193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:09.947831', 'step': 20193, 'epoch': 3} {'type': 'loss', 'content': 0.09021537005901337, 'timestamp': '2025-09-30 22:37:09.956393', 'step': 20194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:09.988035', 'step': 20194, 'epoch': 3} {'type': 'loss', 'content': 0.09571673721075058, 'timestamp': '2025-09-30 22:37:09.992241', 'step': 20195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:10.023485', 'step': 20195, 'epoch': 3} {'type': 'loss', 'content': 0.02450835332274437, 'timestamp': '2025-09-30 22:37:10.049828', 'step': 20196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:10.080958', 'step': 20196, 'epoch': 3} {'type': 'loss', 'content': 0.02182457596063614, 'timestamp': '2025-09-30 22:37:10.083651', 'step': 20197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.116096', 'step': 20197, 'epoch': 3} {'type': 'loss', 'content': 0.09451449662446976, 'timestamp': '2025-09-30 22:37:10.119339', 'step': 20198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:10.153184', 'step': 20198, 'epoch': 3} {'type': 'loss', 'content': 0.20224301517009735, 'timestamp': '2025-09-30 22:37:10.157674', 'step': 20199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.194870', 'step': 20199, 'epoch': 3} {'type': 'loss', 'content': 0.06248055398464203, 'timestamp': '2025-09-30 22:37:10.222386', 'step': 20200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.253069', 'step': 20200, 'epoch': 3} {'type': 'loss', 'content': 0.1086641177535057, 'timestamp': '2025-09-30 22:37:10.256801', 'step': 20201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:10.288731', 'step': 20201, 'epoch': 3} {'type': 'loss', 'content': 0.10903702676296234, 'timestamp': '2025-09-30 22:37:10.292114', 'step': 20202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:10.324905', 'step': 20202, 'epoch': 3} {'type': 'loss', 'content': 0.06174749881029129, 'timestamp': '2025-09-30 22:37:10.327641', 'step': 20203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:10.359258', 'step': 20203, 'epoch': 3} {'type': 'loss', 'content': 0.056767530739307404, 'timestamp': '2025-09-30 22:37:10.386074', 'step': 20204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.421891', 'step': 20204, 'epoch': 3} {'type': 'loss', 'content': 0.06659003347158432, 'timestamp': '2025-09-30 22:37:10.426972', 'step': 20205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:10.471327', 'step': 20205, 'epoch': 3} {'type': 'loss', 'content': 0.11573054641485214, 'timestamp': '2025-09-30 22:37:10.480478', 'step': 20206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:10.519142', 'step': 20206, 'epoch': 3} {'type': 'loss', 'content': 0.04482215642929077, 'timestamp': '2025-09-30 22:37:10.524736', 'step': 20207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.561058', 'step': 20207, 'epoch': 3} {'type': 'loss', 'content': 0.03808176890015602, 'timestamp': '2025-09-30 22:37:10.587298', 'step': 20208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:10.617960', 'step': 20208, 'epoch': 3} {'type': 'loss', 'content': 0.0640515387058258, 'timestamp': '2025-09-30 22:37:10.623607', 'step': 20209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:10.665625', 'step': 20209, 'epoch': 3} {'type': 'loss', 'content': 0.0628071203827858, 'timestamp': '2025-09-30 22:37:10.671008', 'step': 20210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:10.704789', 'step': 20210, 'epoch': 3} {'type': 'loss', 'content': 0.10186611115932465, 'timestamp': '2025-09-30 22:37:10.707234', 'step': 20211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:10.738813', 'step': 20211, 'epoch': 3} {'type': 'loss', 'content': 0.06944870203733444, 'timestamp': '2025-09-30 22:37:10.764405', 'step': 20212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:10.796530', 'step': 20212, 'epoch': 3} {'type': 'loss', 'content': 0.16353286802768707, 'timestamp': '2025-09-30 22:37:10.800478', 'step': 20213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.832020', 'step': 20213, 'epoch': 3} {'type': 'loss', 'content': 0.044766999781131744, 'timestamp': '2025-09-30 22:37:10.835245', 'step': 20214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:10.866895', 'step': 20214, 'epoch': 3} {'type': 'loss', 'content': 0.04771876707673073, 'timestamp': '2025-09-30 22:37:10.872329', 'step': 20215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:10.907167', 'step': 20215, 'epoch': 3} {'type': 'loss', 'content': 0.07434255629777908, 'timestamp': '2025-09-30 22:37:10.930995', 'step': 20216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:10.963809', 'step': 20216, 'epoch': 3} {'type': 'loss', 'content': 0.11758261173963547, 'timestamp': '2025-09-30 22:37:10.968668', 'step': 20217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.013315', 'step': 20217, 'epoch': 3} {'type': 'loss', 'content': 0.09205713123083115, 'timestamp': '2025-09-30 22:37:11.027731', 'step': 20218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:11.064949', 'step': 20218, 'epoch': 3} {'type': 'loss', 'content': 0.08127988874912262, 'timestamp': '2025-09-30 22:37:11.075935', 'step': 20219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:11.107711', 'step': 20219, 'epoch': 3} {'type': 'loss', 'content': 0.09740161895751953, 'timestamp': '2025-09-30 22:37:11.134990', 'step': 20220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.165046', 'step': 20220, 'epoch': 3} {'type': 'loss', 'content': 0.07256732881069183, 'timestamp': '2025-09-30 22:37:11.167894', 'step': 20221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:11.198940', 'step': 20221, 'epoch': 3} {'type': 'loss', 'content': 0.057439692318439484, 'timestamp': '2025-09-30 22:37:11.206104', 'step': 20222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.240379', 'step': 20222, 'epoch': 3} {'type': 'loss', 'content': 0.1405036449432373, 'timestamp': '2025-09-30 22:37:11.243087', 'step': 20223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.273319', 'step': 20223, 'epoch': 3} {'type': 'loss', 'content': 0.040242936462163925, 'timestamp': '2025-09-30 22:37:11.299742', 'step': 20224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.334682', 'step': 20224, 'epoch': 3} {'type': 'loss', 'content': 0.02947293408215046, 'timestamp': '2025-09-30 22:37:11.337719', 'step': 20225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:11.369637', 'step': 20225, 'epoch': 3} {'type': 'loss', 'content': 0.08506274968385696, 'timestamp': '2025-09-30 22:37:11.373229', 'step': 20226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.409500', 'step': 20226, 'epoch': 3} {'type': 'loss', 'content': 0.04108060896396637, 'timestamp': '2025-09-30 22:37:11.413845', 'step': 20227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:11.445315', 'step': 20227, 'epoch': 3} {'type': 'loss', 'content': 0.08231077343225479, 'timestamp': '2025-09-30 22:37:11.483433', 'step': 20228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.515413', 'step': 20228, 'epoch': 3} {'type': 'loss', 'content': 0.10175460577011108, 'timestamp': '2025-09-30 22:37:11.518266', 'step': 20229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:11.559504', 'step': 20229, 'epoch': 3} {'type': 'loss', 'content': 0.08138139545917511, 'timestamp': '2025-09-30 22:37:11.565074', 'step': 20230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.596024', 'step': 20230, 'epoch': 3} {'type': 'loss', 'content': 0.06298492848873138, 'timestamp': '2025-09-30 22:37:11.601368', 'step': 20231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:11.635159', 'step': 20231, 'epoch': 3} {'type': 'loss', 'content': 0.06667033582925797, 'timestamp': '2025-09-30 22:37:11.672727', 'step': 20232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:11.707274', 'step': 20232, 'epoch': 3} {'type': 'loss', 'content': 0.11664730310440063, 'timestamp': '2025-09-30 22:37:11.712636', 'step': 20233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.745260', 'step': 20233, 'epoch': 3} {'type': 'loss', 'content': 0.17713689804077148, 'timestamp': '2025-09-30 22:37:11.751056', 'step': 20234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:11.799424', 'step': 20234, 'epoch': 3} {'type': 'loss', 'content': 0.08652012050151825, 'timestamp': '2025-09-30 22:37:11.817004', 'step': 20235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:11.862325', 'step': 20235, 'epoch': 3} {'type': 'loss', 'content': 0.09709766507148743, 'timestamp': '2025-09-30 22:37:11.905251', 'step': 20236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:11.938167', 'step': 20236, 'epoch': 3} {'type': 'loss', 'content': 0.029365437105298042, 'timestamp': '2025-09-30 22:37:11.942147', 'step': 20237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:11.979090', 'step': 20237, 'epoch': 3} {'type': 'loss', 'content': 0.041603535413742065, 'timestamp': '2025-09-30 22:37:11.983809', 'step': 20238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.027584', 'step': 20238, 'epoch': 3} {'type': 'loss', 'content': 0.05479489266872406, 'timestamp': '2025-09-30 22:37:12.032379', 'step': 20239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.064506', 'step': 20239, 'epoch': 3} {'type': 'loss', 'content': 0.10967035591602325, 'timestamp': '2025-09-30 22:37:12.102857', 'step': 20240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.147382', 'step': 20240, 'epoch': 3} {'type': 'loss', 'content': 0.0640176385641098, 'timestamp': '2025-09-30 22:37:12.155726', 'step': 20241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:12.191801', 'step': 20241, 'epoch': 3} {'type': 'loss', 'content': 0.03964238613843918, 'timestamp': '2025-09-30 22:37:12.197570', 'step': 20242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.230758', 'step': 20242, 'epoch': 3} {'type': 'loss', 'content': 0.08759669214487076, 'timestamp': '2025-09-30 22:37:12.234863', 'step': 20243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.267806', 'step': 20243, 'epoch': 3} {'type': 'loss', 'content': 0.13651278614997864, 'timestamp': '2025-09-30 22:37:12.293899', 'step': 20244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.326465', 'step': 20244, 'epoch': 3} {'type': 'loss', 'content': 0.02672729454934597, 'timestamp': '2025-09-30 22:37:12.329696', 'step': 20245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.361800', 'step': 20245, 'epoch': 3} {'type': 'loss', 'content': 0.04583130404353142, 'timestamp': '2025-09-30 22:37:12.365628', 'step': 20246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.397136', 'step': 20246, 'epoch': 3} {'type': 'loss', 'content': 0.04875973239541054, 'timestamp': '2025-09-30 22:37:12.410763', 'step': 20247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.449458', 'step': 20247, 'epoch': 3} {'type': 'loss', 'content': 0.03797737881541252, 'timestamp': '2025-09-30 22:37:12.474507', 'step': 20248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:12.506086', 'step': 20248, 'epoch': 3} {'type': 'loss', 'content': 0.0458591990172863, 'timestamp': '2025-09-30 22:37:12.509990', 'step': 20249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:12.540781', 'step': 20249, 'epoch': 3} {'type': 'loss', 'content': 0.05460543930530548, 'timestamp': '2025-09-30 22:37:12.545713', 'step': 20250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.577313', 'step': 20250, 'epoch': 3} {'type': 'loss', 'content': 0.07760550081729889, 'timestamp': '2025-09-30 22:37:12.581452', 'step': 20251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:12.625930', 'step': 20251, 'epoch': 3} {'type': 'loss', 'content': 0.09094220399856567, 'timestamp': '2025-09-30 22:37:12.654792', 'step': 20252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.692072', 'step': 20252, 'epoch': 3} {'type': 'loss', 'content': 0.07475452125072479, 'timestamp': '2025-09-30 22:37:12.697043', 'step': 20253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.728260', 'step': 20253, 'epoch': 3} {'type': 'loss', 'content': 0.009552794508635998, 'timestamp': '2025-09-30 22:37:12.742139', 'step': 20254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.787269', 'step': 20254, 'epoch': 3} {'type': 'loss', 'content': 0.131353497505188, 'timestamp': '2025-09-30 22:37:12.791545', 'step': 20255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:12.824940', 'step': 20255, 'epoch': 3} {'type': 'loss', 'content': 0.0904417335987091, 'timestamp': '2025-09-30 22:37:12.851399', 'step': 20256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.883627', 'step': 20256, 'epoch': 3} {'type': 'loss', 'content': 0.03651362285017967, 'timestamp': '2025-09-30 22:37:12.890760', 'step': 20257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:12.922159', 'step': 20257, 'epoch': 3} {'type': 'loss', 'content': 0.06539086252450943, 'timestamp': '2025-09-30 22:37:12.925811', 'step': 20258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:12.958499', 'step': 20258, 'epoch': 3} {'type': 'loss', 'content': 0.09176793694496155, 'timestamp': '2025-09-30 22:37:12.962784', 'step': 20259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:12.995543', 'step': 20259, 'epoch': 3} {'type': 'loss', 'content': 0.03862912952899933, 'timestamp': '2025-09-30 22:37:13.022251', 'step': 20260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.052891', 'step': 20260, 'epoch': 3} {'type': 'loss', 'content': 0.021177759394049644, 'timestamp': '2025-09-30 22:37:13.055868', 'step': 20261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.087684', 'step': 20261, 'epoch': 3} {'type': 'loss', 'content': 0.0016738054109737277, 'timestamp': '2025-09-30 22:37:13.091810', 'step': 20262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.123747', 'step': 20262, 'epoch': 3} {'type': 'loss', 'content': 0.07083538919687271, 'timestamp': '2025-09-30 22:37:13.126642', 'step': 20263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.165296', 'step': 20263, 'epoch': 3} {'type': 'loss', 'content': 0.1318550556898117, 'timestamp': '2025-09-30 22:37:13.195796', 'step': 20264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:13.230067', 'step': 20264, 'epoch': 3} {'type': 'loss', 'content': 0.06144660338759422, 'timestamp': '2025-09-30 22:37:13.238865', 'step': 20265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:13.271283', 'step': 20265, 'epoch': 3} {'type': 'loss', 'content': 0.062372513115406036, 'timestamp': '2025-09-30 22:37:13.275021', 'step': 20266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:13.307777', 'step': 20266, 'epoch': 3} {'type': 'loss', 'content': 0.09492339938879013, 'timestamp': '2025-09-30 22:37:13.320875', 'step': 20267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.353955', 'step': 20267, 'epoch': 3} {'type': 'loss', 'content': 0.0882912203669548, 'timestamp': '2025-09-30 22:37:13.386114', 'step': 20268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.433729', 'step': 20268, 'epoch': 3} {'type': 'loss', 'content': 0.04831339791417122, 'timestamp': '2025-09-30 22:37:13.437777', 'step': 20269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:13.484260', 'step': 20269, 'epoch': 3} {'type': 'loss', 'content': 0.0871744230389595, 'timestamp': '2025-09-30 22:37:13.495508', 'step': 20270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.531329', 'step': 20270, 'epoch': 3} {'type': 'loss', 'content': 0.07169336825609207, 'timestamp': '2025-09-30 22:37:13.538716', 'step': 20271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.574926', 'step': 20271, 'epoch': 3} {'type': 'loss', 'content': 0.07272568345069885, 'timestamp': '2025-09-30 22:37:13.603534', 'step': 20272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.635979', 'step': 20272, 'epoch': 3} {'type': 'loss', 'content': 0.038779694586992264, 'timestamp': '2025-09-30 22:37:13.640850', 'step': 20273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:13.674420', 'step': 20273, 'epoch': 3} {'type': 'loss', 'content': 0.06733846664428711, 'timestamp': '2025-09-30 22:37:13.680254', 'step': 20274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:13.716462', 'step': 20274, 'epoch': 3} {'type': 'loss', 'content': 0.1451745182275772, 'timestamp': '2025-09-30 22:37:13.723603', 'step': 20275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:13.759373', 'step': 20275, 'epoch': 3} {'type': 'loss', 'content': 0.06877829879522324, 'timestamp': '2025-09-30 22:37:13.787383', 'step': 20276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.819684', 'step': 20276, 'epoch': 3} {'type': 'loss', 'content': 0.08484900742769241, 'timestamp': '2025-09-30 22:37:13.825165', 'step': 20277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:13.857362', 'step': 20277, 'epoch': 3} {'type': 'loss', 'content': 0.05349460244178772, 'timestamp': '2025-09-30 22:37:13.860498', 'step': 20278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.894131', 'step': 20278, 'epoch': 3} {'type': 'loss', 'content': 0.03450920060276985, 'timestamp': '2025-09-30 22:37:13.897978', 'step': 20279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:13.933961', 'step': 20279, 'epoch': 3} {'type': 'loss', 'content': 0.06991922110319138, 'timestamp': '2025-09-30 22:37:13.963619', 'step': 20280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:13.999780', 'step': 20280, 'epoch': 3} {'type': 'loss', 'content': 0.0853160172700882, 'timestamp': '2025-09-30 22:37:14.002744', 'step': 20281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.039425', 'step': 20281, 'epoch': 3} {'type': 'loss', 'content': 0.055545154958963394, 'timestamp': '2025-09-30 22:37:14.044119', 'step': 20282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.075589', 'step': 20282, 'epoch': 3} {'type': 'loss', 'content': 0.07686812430620193, 'timestamp': '2025-09-30 22:37:14.078983', 'step': 20283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.110064', 'step': 20283, 'epoch': 3} {'type': 'loss', 'content': 0.08832519501447678, 'timestamp': '2025-09-30 22:37:14.136108', 'step': 20284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:14.170619', 'step': 20284, 'epoch': 3} {'type': 'loss', 'content': 0.06650129705667496, 'timestamp': '2025-09-30 22:37:14.175141', 'step': 20285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.205156', 'step': 20285, 'epoch': 3} {'type': 'loss', 'content': 0.09550514072179794, 'timestamp': '2025-09-30 22:37:14.221546', 'step': 20286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.258429', 'step': 20286, 'epoch': 3} {'type': 'loss', 'content': 0.05567726492881775, 'timestamp': '2025-09-30 22:37:14.262987', 'step': 20287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.293408', 'step': 20287, 'epoch': 3} {'type': 'loss', 'content': 0.029166782274842262, 'timestamp': '2025-09-30 22:37:14.317708', 'step': 20288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.352865', 'step': 20288, 'epoch': 3} {'type': 'loss', 'content': 0.09884971380233765, 'timestamp': '2025-09-30 22:37:14.363436', 'step': 20289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.397263', 'step': 20289, 'epoch': 3} {'type': 'loss', 'content': 0.05548613891005516, 'timestamp': '2025-09-30 22:37:14.402052', 'step': 20290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:14.434391', 'step': 20290, 'epoch': 3} {'type': 'loss', 'content': 0.056119631975889206, 'timestamp': '2025-09-30 22:37:14.438634', 'step': 20291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.470719', 'step': 20291, 'epoch': 3} {'type': 'loss', 'content': 0.09896641224622726, 'timestamp': '2025-09-30 22:37:14.496408', 'step': 20292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:14.530247', 'step': 20292, 'epoch': 3} {'type': 'loss', 'content': 0.054061077535152435, 'timestamp': '2025-09-30 22:37:14.540269', 'step': 20293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.571583', 'step': 20293, 'epoch': 3} {'type': 'loss', 'content': 0.04832693934440613, 'timestamp': '2025-09-30 22:37:14.575509', 'step': 20294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:14.606480', 'step': 20294, 'epoch': 3} {'type': 'loss', 'content': 0.030359415337443352, 'timestamp': '2025-09-30 22:37:14.611961', 'step': 20295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.647855', 'step': 20295, 'epoch': 3} {'type': 'loss', 'content': 0.03377072513103485, 'timestamp': '2025-09-30 22:37:14.674286', 'step': 20296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.705583', 'step': 20296, 'epoch': 3} {'type': 'loss', 'content': 0.021738732233643532, 'timestamp': '2025-09-30 22:37:14.714119', 'step': 20297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.748657', 'step': 20297, 'epoch': 3} {'type': 'loss', 'content': 0.053351353853940964, 'timestamp': '2025-09-30 22:37:14.752787', 'step': 20298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.784002', 'step': 20298, 'epoch': 3} {'type': 'loss', 'content': 0.06759603321552277, 'timestamp': '2025-09-30 22:37:14.786836', 'step': 20299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.825864', 'step': 20299, 'epoch': 3} {'type': 'loss', 'content': 0.027859674766659737, 'timestamp': '2025-09-30 22:37:14.852386', 'step': 20300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:14.885575', 'step': 20300, 'epoch': 3} {'type': 'loss', 'content': 0.05063170567154884, 'timestamp': '2025-09-30 22:37:14.888696', 'step': 20301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:14.933148', 'step': 20301, 'epoch': 3} {'type': 'loss', 'content': 0.05058273300528526, 'timestamp': '2025-09-30 22:37:14.938271', 'step': 20302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:14.973173', 'step': 20302, 'epoch': 3} {'type': 'loss', 'content': 0.015074574388563633, 'timestamp': '2025-09-30 22:37:14.976924', 'step': 20303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.012338', 'step': 20303, 'epoch': 3} {'type': 'loss', 'content': 0.043966639786958694, 'timestamp': '2025-09-30 22:37:15.038753', 'step': 20304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.073270', 'step': 20304, 'epoch': 3} {'type': 'loss', 'content': 0.059536416083574295, 'timestamp': '2025-09-30 22:37:15.077362', 'step': 20305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.108424', 'step': 20305, 'epoch': 3} {'type': 'loss', 'content': 0.0042277793399989605, 'timestamp': '2025-09-30 22:37:15.111458', 'step': 20306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.147239', 'step': 20306, 'epoch': 3} {'type': 'loss', 'content': 0.06673134118318558, 'timestamp': '2025-09-30 22:37:15.151431', 'step': 20307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:15.183229', 'step': 20307, 'epoch': 3} {'type': 'loss', 'content': 0.10974754393100739, 'timestamp': '2025-09-30 22:37:15.207883', 'step': 20308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.243202', 'step': 20308, 'epoch': 3} {'type': 'loss', 'content': 0.049591969698667526, 'timestamp': '2025-09-30 22:37:15.249380', 'step': 20309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.288919', 'step': 20309, 'epoch': 3} {'type': 'loss', 'content': 0.13376466929912567, 'timestamp': '2025-09-30 22:37:15.293119', 'step': 20310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.324706', 'step': 20310, 'epoch': 3} {'type': 'loss', 'content': 0.03153548389673233, 'timestamp': '2025-09-30 22:37:15.333762', 'step': 20311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.370269', 'step': 20311, 'epoch': 3} {'type': 'loss', 'content': 0.09836067259311676, 'timestamp': '2025-09-30 22:37:15.401989', 'step': 20312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.435750', 'step': 20312, 'epoch': 3} {'type': 'loss', 'content': 0.041157376021146774, 'timestamp': '2025-09-30 22:37:15.441344', 'step': 20313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.474863', 'step': 20313, 'epoch': 3} {'type': 'loss', 'content': 0.07130996882915497, 'timestamp': '2025-09-30 22:37:15.482372', 'step': 20314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.516319', 'step': 20314, 'epoch': 3} {'type': 'loss', 'content': 0.08269548416137695, 'timestamp': '2025-09-30 22:37:15.519867', 'step': 20315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.554318', 'step': 20315, 'epoch': 3} {'type': 'loss', 'content': 0.0685959979891777, 'timestamp': '2025-09-30 22:37:15.578651', 'step': 20316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.610674', 'step': 20316, 'epoch': 3} {'type': 'loss', 'content': 0.06764538586139679, 'timestamp': '2025-09-30 22:37:15.620076', 'step': 20317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.651013', 'step': 20317, 'epoch': 3} {'type': 'loss', 'content': 0.14513540267944336, 'timestamp': '2025-09-30 22:37:15.656458', 'step': 20318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.693131', 'step': 20318, 'epoch': 3} {'type': 'loss', 'content': 0.09800634533166885, 'timestamp': '2025-09-30 22:37:15.702339', 'step': 20319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:15.736482', 'step': 20319, 'epoch': 3} {'type': 'loss', 'content': 0.05244975909590721, 'timestamp': '2025-09-30 22:37:15.761861', 'step': 20320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.794590', 'step': 20320, 'epoch': 3} {'type': 'loss', 'content': 0.05107906460762024, 'timestamp': '2025-09-30 22:37:15.798118', 'step': 20321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.833160', 'step': 20321, 'epoch': 3} {'type': 'loss', 'content': 0.046601392328739166, 'timestamp': '2025-09-30 22:37:15.837668', 'step': 20322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:15.869857', 'step': 20322, 'epoch': 3} {'type': 'loss', 'content': 0.054452430456876755, 'timestamp': '2025-09-30 22:37:15.875354', 'step': 20323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:15.908582', 'step': 20323, 'epoch': 3} {'type': 'loss', 'content': 0.016536185517907143, 'timestamp': '2025-09-30 22:37:15.937123', 'step': 20324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:15.968604', 'step': 20324, 'epoch': 3} {'type': 'loss', 'content': 0.017876017838716507, 'timestamp': '2025-09-30 22:37:15.971527', 'step': 20325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.003570', 'step': 20325, 'epoch': 3} {'type': 'loss', 'content': 0.054631829261779785, 'timestamp': '2025-09-30 22:37:16.018875', 'step': 20326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.053476', 'step': 20326, 'epoch': 3} {'type': 'loss', 'content': 0.04158948361873627, 'timestamp': '2025-09-30 22:37:16.056472', 'step': 20327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:16.089691', 'step': 20327, 'epoch': 3} {'type': 'loss', 'content': 0.05392904207110405, 'timestamp': '2025-09-30 22:37:16.115044', 'step': 20328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:16.147327', 'step': 20328, 'epoch': 3} {'type': 'loss', 'content': 0.042596977204084396, 'timestamp': '2025-09-30 22:37:16.159375', 'step': 20329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:16.192058', 'step': 20329, 'epoch': 3} {'type': 'loss', 'content': 0.10448776930570602, 'timestamp': '2025-09-30 22:37:16.201863', 'step': 20330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.234075', 'step': 20330, 'epoch': 3} {'type': 'loss', 'content': 0.07979495078325272, 'timestamp': '2025-09-30 22:37:16.244081', 'step': 20331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:16.275491', 'step': 20331, 'epoch': 3} {'type': 'loss', 'content': 0.1049424335360527, 'timestamp': '2025-09-30 22:37:16.301284', 'step': 20332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.340162', 'step': 20332, 'epoch': 3} {'type': 'loss', 'content': 0.03991207852959633, 'timestamp': '2025-09-30 22:37:16.352546', 'step': 20333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:16.386452', 'step': 20333, 'epoch': 3} {'type': 'loss', 'content': 0.03737819194793701, 'timestamp': '2025-09-30 22:37:16.390299', 'step': 20334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.429551', 'step': 20334, 'epoch': 3} {'type': 'loss', 'content': 0.0860745906829834, 'timestamp': '2025-09-30 22:37:16.433972', 'step': 20335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.468397', 'step': 20335, 'epoch': 3} {'type': 'loss', 'content': 0.13758869469165802, 'timestamp': '2025-09-30 22:37:16.493940', 'step': 20336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:16.529596', 'step': 20336, 'epoch': 3} {'type': 'loss', 'content': 0.05466997250914574, 'timestamp': '2025-09-30 22:37:16.534361', 'step': 20337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:16.574424', 'step': 20337, 'epoch': 3} {'type': 'loss', 'content': 0.05784545838832855, 'timestamp': '2025-09-30 22:37:16.579946', 'step': 20338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.619693', 'step': 20338, 'epoch': 3} {'type': 'loss', 'content': 0.017947526648640633, 'timestamp': '2025-09-30 22:37:16.629650', 'step': 20339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:16.660955', 'step': 20339, 'epoch': 3} {'type': 'loss', 'content': 0.013634852133691311, 'timestamp': '2025-09-30 22:37:16.694179', 'step': 20340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.726024', 'step': 20340, 'epoch': 3} {'type': 'loss', 'content': 0.06796440482139587, 'timestamp': '2025-09-30 22:37:16.731947', 'step': 20341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.763162', 'step': 20341, 'epoch': 3} {'type': 'loss', 'content': 0.0989745482802391, 'timestamp': '2025-09-30 22:37:16.767710', 'step': 20342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.812320', 'step': 20342, 'epoch': 3} {'type': 'loss', 'content': 0.01002084743231535, 'timestamp': '2025-09-30 22:37:16.823673', 'step': 20343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:16.858510', 'step': 20343, 'epoch': 3} {'type': 'loss', 'content': 0.07201375812292099, 'timestamp': '2025-09-30 22:37:16.887409', 'step': 20344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:16.926712', 'step': 20344, 'epoch': 3} {'type': 'loss', 'content': 0.04813418537378311, 'timestamp': '2025-09-30 22:37:16.930625', 'step': 20345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:16.964032', 'step': 20345, 'epoch': 3} {'type': 'loss', 'content': 0.040864065289497375, 'timestamp': '2025-09-30 22:37:16.975497', 'step': 20346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.010478', 'step': 20346, 'epoch': 3} {'type': 'loss', 'content': 0.05496077612042427, 'timestamp': '2025-09-30 22:37:17.017751', 'step': 20347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.054004', 'step': 20347, 'epoch': 3} {'type': 'loss', 'content': 0.12458393722772598, 'timestamp': '2025-09-30 22:37:17.084665', 'step': 20348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.116352', 'step': 20348, 'epoch': 3} {'type': 'loss', 'content': 0.0223868228495121, 'timestamp': '2025-09-30 22:37:17.120320', 'step': 20349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.158167', 'step': 20349, 'epoch': 3} {'type': 'loss', 'content': 0.03484226018190384, 'timestamp': '2025-09-30 22:37:17.164974', 'step': 20350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.198165', 'step': 20350, 'epoch': 3} {'type': 'loss', 'content': 0.03996371477842331, 'timestamp': '2025-09-30 22:37:17.203940', 'step': 20351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.237276', 'step': 20351, 'epoch': 3} {'type': 'loss', 'content': 0.07839938998222351, 'timestamp': '2025-09-30 22:37:17.266308', 'step': 20352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.296603', 'step': 20352, 'epoch': 3} {'type': 'loss', 'content': 0.09050536155700684, 'timestamp': '2025-09-30 22:37:17.299895', 'step': 20353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.332963', 'step': 20353, 'epoch': 3} {'type': 'loss', 'content': 0.07001959532499313, 'timestamp': '2025-09-30 22:37:17.340253', 'step': 20354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.373517', 'step': 20354, 'epoch': 3} {'type': 'loss', 'content': 0.05827143043279648, 'timestamp': '2025-09-30 22:37:17.381432', 'step': 20355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.418323', 'step': 20355, 'epoch': 3} {'type': 'loss', 'content': 0.07991743087768555, 'timestamp': '2025-09-30 22:37:17.447462', 'step': 20356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.488668', 'step': 20356, 'epoch': 3} {'type': 'loss', 'content': 0.03816582262516022, 'timestamp': '2025-09-30 22:37:17.496427', 'step': 20357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.537103', 'step': 20357, 'epoch': 3} {'type': 'loss', 'content': 0.06696432083845139, 'timestamp': '2025-09-30 22:37:17.540891', 'step': 20358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.571913', 'step': 20358, 'epoch': 3} {'type': 'loss', 'content': 0.07566455006599426, 'timestamp': '2025-09-30 22:37:17.577793', 'step': 20359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.610984', 'step': 20359, 'epoch': 3} {'type': 'loss', 'content': 0.047458164393901825, 'timestamp': '2025-09-30 22:37:17.636363', 'step': 20360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.667751', 'step': 20360, 'epoch': 3} {'type': 'loss', 'content': 0.17223812639713287, 'timestamp': '2025-09-30 22:37:17.672974', 'step': 20361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.706774', 'step': 20361, 'epoch': 3} {'type': 'loss', 'content': 0.02168545313179493, 'timestamp': '2025-09-30 22:37:17.713847', 'step': 20362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.748651', 'step': 20362, 'epoch': 3} {'type': 'loss', 'content': 0.03628082945942879, 'timestamp': '2025-09-30 22:37:17.754017', 'step': 20363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.784487', 'step': 20363, 'epoch': 3} {'type': 'loss', 'content': 0.05637683719396591, 'timestamp': '2025-09-30 22:37:17.809591', 'step': 20364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:17.842841', 'step': 20364, 'epoch': 3} {'type': 'loss', 'content': 0.021919971331954002, 'timestamp': '2025-09-30 22:37:17.849273', 'step': 20365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.882416', 'step': 20365, 'epoch': 3} {'type': 'loss', 'content': 0.10055655986070633, 'timestamp': '2025-09-30 22:37:17.888410', 'step': 20366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:17.924693', 'step': 20366, 'epoch': 3} {'type': 'loss', 'content': 0.01430110540241003, 'timestamp': '2025-09-30 22:37:17.927400', 'step': 20367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:17.962588', 'step': 20367, 'epoch': 3} {'type': 'loss', 'content': 0.07362000644207001, 'timestamp': '2025-09-30 22:37:17.986942', 'step': 20368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.019580', 'step': 20368, 'epoch': 3} {'type': 'loss', 'content': 0.07229907810688019, 'timestamp': '2025-09-30 22:37:18.026209', 'step': 20369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:18.059708', 'step': 20369, 'epoch': 3} {'type': 'loss', 'content': 0.0381135493516922, 'timestamp': '2025-09-30 22:37:18.062567', 'step': 20370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.093720', 'step': 20370, 'epoch': 3} {'type': 'loss', 'content': 0.14673550426959991, 'timestamp': '2025-09-30 22:37:18.097155', 'step': 20371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.130960', 'step': 20371, 'epoch': 3} {'type': 'loss', 'content': 0.04017876088619232, 'timestamp': '2025-09-30 22:37:18.157135', 'step': 20372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:18.189640', 'step': 20372, 'epoch': 3} {'type': 'loss', 'content': 0.05243247374892235, 'timestamp': '2025-09-30 22:37:18.194316', 'step': 20373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.231540', 'step': 20373, 'epoch': 3} {'type': 'loss', 'content': 0.1220279186964035, 'timestamp': '2025-09-30 22:37:18.236898', 'step': 20374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.269915', 'step': 20374, 'epoch': 3} {'type': 'loss', 'content': 0.035033609718084335, 'timestamp': '2025-09-30 22:37:18.274486', 'step': 20375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.306046', 'step': 20375, 'epoch': 3} {'type': 'loss', 'content': 0.08800520747900009, 'timestamp': '2025-09-30 22:37:18.340034', 'step': 20376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:18.376821', 'step': 20376, 'epoch': 3} {'type': 'loss', 'content': 0.04923267289996147, 'timestamp': '2025-09-30 22:37:18.379816', 'step': 20377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:18.420145', 'step': 20377, 'epoch': 3} {'type': 'loss', 'content': 0.1033342257142067, 'timestamp': '2025-09-30 22:37:18.432353', 'step': 20378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.464585', 'step': 20378, 'epoch': 3} {'type': 'loss', 'content': 0.03656427562236786, 'timestamp': '2025-09-30 22:37:18.469388', 'step': 20379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.506089', 'step': 20379, 'epoch': 3} {'type': 'loss', 'content': 0.09320639073848724, 'timestamp': '2025-09-30 22:37:18.530453', 'step': 20380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:18.564395', 'step': 20380, 'epoch': 3} {'type': 'loss', 'content': 0.0211452879011631, 'timestamp': '2025-09-30 22:37:18.568832', 'step': 20381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.602435', 'step': 20381, 'epoch': 3} {'type': 'loss', 'content': 0.05311882123351097, 'timestamp': '2025-09-30 22:37:18.611188', 'step': 20382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.653442', 'step': 20382, 'epoch': 3} {'type': 'loss', 'content': 0.03910059109330177, 'timestamp': '2025-09-30 22:37:18.656184', 'step': 20383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:18.691625', 'step': 20383, 'epoch': 3} {'type': 'loss', 'content': 0.024712733924388885, 'timestamp': '2025-09-30 22:37:18.718052', 'step': 20384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.755864', 'step': 20384, 'epoch': 3} {'type': 'loss', 'content': 0.02007223479449749, 'timestamp': '2025-09-30 22:37:18.761819', 'step': 20385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.796823', 'step': 20385, 'epoch': 3} {'type': 'loss', 'content': 0.08357851207256317, 'timestamp': '2025-09-30 22:37:18.803511', 'step': 20386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:18.837104', 'step': 20386, 'epoch': 3} {'type': 'loss', 'content': 0.07014356553554535, 'timestamp': '2025-09-30 22:37:18.841654', 'step': 20387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.873393', 'step': 20387, 'epoch': 3} {'type': 'loss', 'content': 0.15384815633296967, 'timestamp': '2025-09-30 22:37:18.897528', 'step': 20388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:18.931498', 'step': 20388, 'epoch': 3} {'type': 'loss', 'content': 0.018298117443919182, 'timestamp': '2025-09-30 22:37:18.934302', 'step': 20389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:18.970890', 'step': 20389, 'epoch': 3} {'type': 'loss', 'content': 0.10529725253582001, 'timestamp': '2025-09-30 22:37:18.975328', 'step': 20390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.006787', 'step': 20390, 'epoch': 3} {'type': 'loss', 'content': 0.1049310639500618, 'timestamp': '2025-09-30 22:37:19.010295', 'step': 20391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.041509', 'step': 20391, 'epoch': 3} {'type': 'loss', 'content': 0.02208307385444641, 'timestamp': '2025-09-30 22:37:19.074974', 'step': 20392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.107961', 'step': 20392, 'epoch': 3} {'type': 'loss', 'content': 0.04927714541554451, 'timestamp': '2025-09-30 22:37:19.110614', 'step': 20393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.141853', 'step': 20393, 'epoch': 3} {'type': 'loss', 'content': 0.057600826025009155, 'timestamp': '2025-09-30 22:37:19.154161', 'step': 20394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.194264', 'step': 20394, 'epoch': 3} {'type': 'loss', 'content': 0.0508410707116127, 'timestamp': '2025-09-30 22:37:19.198418', 'step': 20395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.234983', 'step': 20395, 'epoch': 3} {'type': 'loss', 'content': 0.03722253441810608, 'timestamp': '2025-09-30 22:37:19.259818', 'step': 20396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.292910', 'step': 20396, 'epoch': 3} {'type': 'loss', 'content': 0.13675521314144135, 'timestamp': '2025-09-30 22:37:19.301447', 'step': 20397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.337163', 'step': 20397, 'epoch': 3} {'type': 'loss', 'content': 0.03879235312342644, 'timestamp': '2025-09-30 22:37:19.341237', 'step': 20398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.372161', 'step': 20398, 'epoch': 3} {'type': 'loss', 'content': 0.08877221494913101, 'timestamp': '2025-09-30 22:37:19.377021', 'step': 20399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.408869', 'step': 20399, 'epoch': 3} {'type': 'loss', 'content': 0.032157160341739655, 'timestamp': '2025-09-30 22:37:19.433984', 'step': 20400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.475607', 'step': 20400, 'epoch': 3} {'type': 'loss', 'content': 0.09024758636951447, 'timestamp': '2025-09-30 22:37:19.480597', 'step': 20401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.512776', 'step': 20401, 'epoch': 3} {'type': 'loss', 'content': 0.02373751439154148, 'timestamp': '2025-09-30 22:37:19.515098', 'step': 20402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.545294', 'step': 20402, 'epoch': 3} {'type': 'loss', 'content': 0.06309835612773895, 'timestamp': '2025-09-30 22:37:19.551842', 'step': 20403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.587368', 'step': 20403, 'epoch': 3} {'type': 'loss', 'content': 0.05437082424759865, 'timestamp': '2025-09-30 22:37:19.612843', 'step': 20404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.645259', 'step': 20404, 'epoch': 3} {'type': 'loss', 'content': 0.07690809667110443, 'timestamp': '2025-09-30 22:37:19.648399', 'step': 20405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.680474', 'step': 20405, 'epoch': 3} {'type': 'loss', 'content': 0.08665826916694641, 'timestamp': '2025-09-30 22:37:19.685710', 'step': 20406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.718409', 'step': 20406, 'epoch': 3} {'type': 'loss', 'content': 0.02562660723924637, 'timestamp': '2025-09-30 22:37:19.721402', 'step': 20407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.762685', 'step': 20407, 'epoch': 3} {'type': 'loss', 'content': 0.06359926611185074, 'timestamp': '2025-09-30 22:37:19.787199', 'step': 20408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.818023', 'step': 20408, 'epoch': 3} {'type': 'loss', 'content': 0.04376386106014252, 'timestamp': '2025-09-30 22:37:19.844677', 'step': 20409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:19.875425', 'step': 20409, 'epoch': 3} {'type': 'loss', 'content': 0.052512478083372116, 'timestamp': '2025-09-30 22:37:19.880799', 'step': 20410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:19.915767', 'step': 20410, 'epoch': 3} {'type': 'loss', 'content': 0.02191394753754139, 'timestamp': '2025-09-30 22:37:19.918254', 'step': 20411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:19.948563', 'step': 20411, 'epoch': 3} {'type': 'loss', 'content': 0.05771873518824577, 'timestamp': '2025-09-30 22:37:19.981007', 'step': 20412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.016532', 'step': 20412, 'epoch': 3} {'type': 'loss', 'content': 0.026575153693556786, 'timestamp': '2025-09-30 22:37:20.020512', 'step': 20413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:20.053702', 'step': 20413, 'epoch': 3} {'type': 'loss', 'content': 0.04278649762272835, 'timestamp': '2025-09-30 22:37:20.059927', 'step': 20414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.092000', 'step': 20414, 'epoch': 3} {'type': 'loss', 'content': 0.07869312167167664, 'timestamp': '2025-09-30 22:37:20.095151', 'step': 20415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.126704', 'step': 20415, 'epoch': 3} {'type': 'loss', 'content': 0.10185268521308899, 'timestamp': '2025-09-30 22:37:20.155249', 'step': 20416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.188976', 'step': 20416, 'epoch': 3} {'type': 'loss', 'content': 0.062209561467170715, 'timestamp': '2025-09-30 22:37:20.192973', 'step': 20417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.225278', 'step': 20417, 'epoch': 3} {'type': 'loss', 'content': 0.051168520003557205, 'timestamp': '2025-09-30 22:37:20.231170', 'step': 20418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.263940', 'step': 20418, 'epoch': 3} {'type': 'loss', 'content': 0.06677339226007462, 'timestamp': '2025-09-30 22:37:20.266912', 'step': 20419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:20.297688', 'step': 20419, 'epoch': 3} {'type': 'loss', 'content': 0.08942397683858871, 'timestamp': '2025-09-30 22:37:20.322576', 'step': 20420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.353871', 'step': 20420, 'epoch': 3} {'type': 'loss', 'content': 0.07204464823007584, 'timestamp': '2025-09-30 22:37:20.368293', 'step': 20421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.404156', 'step': 20421, 'epoch': 3} {'type': 'loss', 'content': 0.0352548211812973, 'timestamp': '2025-09-30 22:37:20.409429', 'step': 20422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.441743', 'step': 20422, 'epoch': 3} {'type': 'loss', 'content': 0.0018349115271121264, 'timestamp': '2025-09-30 22:37:20.445480', 'step': 20423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.487677', 'step': 20423, 'epoch': 3} {'type': 'loss', 'content': 0.08265934139490128, 'timestamp': '2025-09-30 22:37:20.512950', 'step': 20424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:20.544079', 'step': 20424, 'epoch': 3} {'type': 'loss', 'content': 0.06665769219398499, 'timestamp': '2025-09-30 22:37:20.546520', 'step': 20425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:20.576623', 'step': 20425, 'epoch': 3} {'type': 'loss', 'content': 0.09713566303253174, 'timestamp': '2025-09-30 22:37:20.585822', 'step': 20426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:20.624491', 'step': 20426, 'epoch': 3} {'type': 'loss', 'content': 0.09992945939302444, 'timestamp': '2025-09-30 22:37:20.629682', 'step': 20427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.662663', 'step': 20427, 'epoch': 3} {'type': 'loss', 'content': 0.0776020810008049, 'timestamp': '2025-09-30 22:37:20.687877', 'step': 20428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.719984', 'step': 20428, 'epoch': 3} {'type': 'loss', 'content': 0.04859282821416855, 'timestamp': '2025-09-30 22:37:20.724974', 'step': 20429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.757190', 'step': 20429, 'epoch': 3} {'type': 'loss', 'content': 0.056367434561252594, 'timestamp': '2025-09-30 22:37:20.762628', 'step': 20430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:20.794297', 'step': 20430, 'epoch': 3} {'type': 'loss', 'content': 0.0891701877117157, 'timestamp': '2025-09-30 22:37:20.797835', 'step': 20431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:20.830904', 'step': 20431, 'epoch': 3} {'type': 'loss', 'content': 0.016808846965432167, 'timestamp': '2025-09-30 22:37:20.858159', 'step': 20432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:20.890988', 'step': 20432, 'epoch': 3} {'type': 'loss', 'content': 0.03096011094748974, 'timestamp': '2025-09-30 22:37:20.900004', 'step': 20433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:20.954016', 'step': 20433, 'epoch': 3} {'type': 'loss', 'content': 0.02947992831468582, 'timestamp': '2025-09-30 22:37:20.956495', 'step': 20434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:21.046445', 'step': 20434, 'epoch': 3} {'type': 'loss', 'content': 0.05355115234851837, 'timestamp': '2025-09-30 22:37:21.056324', 'step': 20435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:21.127799', 'step': 20435, 'epoch': 3} {'type': 'loss', 'content': 0.048886481672525406, 'timestamp': '2025-09-30 22:37:21.165904', 'step': 20436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:21.208491', 'step': 20436, 'epoch': 3} {'type': 'loss', 'content': 0.07874138653278351, 'timestamp': '2025-09-30 22:37:21.212768', 'step': 20437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:21.290987', 'step': 20437, 'epoch': 3} {'type': 'loss', 'content': 0.09214048832654953, 'timestamp': '2025-09-30 22:37:21.295787', 'step': 20438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:21.356234', 'step': 20438, 'epoch': 3} {'type': 'loss', 'content': 0.07146593183279037, 'timestamp': '2025-09-30 22:37:21.358866', 'step': 20439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:21.411274', 'step': 20439, 'epoch': 3} {'type': 'loss', 'content': 0.02623998373746872, 'timestamp': '2025-09-30 22:37:21.444910', 'step': 20440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:21.495863', 'step': 20440, 'epoch': 3} {'type': 'loss', 'content': 0.07166168838739395, 'timestamp': '2025-09-30 22:37:21.499766', 'step': 20441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:21.572885', 'step': 20441, 'epoch': 3} {'type': 'loss', 'content': 0.03412008285522461, 'timestamp': '2025-09-30 22:37:21.577533', 'step': 20442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:21.643991', 'step': 20442, 'epoch': 3} {'type': 'loss', 'content': 0.0716402679681778, 'timestamp': '2025-09-30 22:37:21.648338', 'step': 20443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:21.693480', 'step': 20443, 'epoch': 3} {'type': 'loss', 'content': 0.04297541081905365, 'timestamp': '2025-09-30 22:37:21.721433', 'step': 20444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:21.767263', 'step': 20444, 'epoch': 3} {'type': 'loss', 'content': 0.021060975268483162, 'timestamp': '2025-09-30 22:37:21.780402', 'step': 20445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:37:21.832599', 'step': 20445, 'epoch': 3} {'type': 'loss', 'content': 0.09559670090675354, 'timestamp': '2025-09-30 22:37:21.839545', 'step': 20446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:21.905900', 'step': 20446, 'epoch': 3} {'type': 'loss', 'content': 0.05681753158569336, 'timestamp': '2025-09-30 22:37:21.920825', 'step': 20447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:21.972499', 'step': 20447, 'epoch': 3} {'type': 'loss', 'content': 0.040668588131666183, 'timestamp': '2025-09-30 22:37:21.997309', 'step': 20448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.048788', 'step': 20448, 'epoch': 3} {'type': 'loss', 'content': 0.03975053131580353, 'timestamp': '2025-09-30 22:37:22.051876', 'step': 20449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.106598', 'step': 20449, 'epoch': 3} {'type': 'loss', 'content': 0.02661014162003994, 'timestamp': '2025-09-30 22:37:22.110694', 'step': 20450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.146685', 'step': 20450, 'epoch': 3} {'type': 'loss', 'content': 0.055556464940309525, 'timestamp': '2025-09-30 22:37:22.150839', 'step': 20451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.204608', 'step': 20451, 'epoch': 3} {'type': 'loss', 'content': 0.1252647340297699, 'timestamp': '2025-09-30 22:37:22.230386', 'step': 20452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.312293', 'step': 20452, 'epoch': 3} {'type': 'loss', 'content': 0.028128445148468018, 'timestamp': '2025-09-30 22:37:22.321708', 'step': 20453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.361327', 'step': 20453, 'epoch': 3} {'type': 'loss', 'content': 0.0915704071521759, 'timestamp': '2025-09-30 22:37:22.366361', 'step': 20454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:22.399878', 'step': 20454, 'epoch': 3} {'type': 'loss', 'content': 0.11173053830862045, 'timestamp': '2025-09-30 22:37:22.404397', 'step': 20455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.435716', 'step': 20455, 'epoch': 3} {'type': 'loss', 'content': 0.08141428232192993, 'timestamp': '2025-09-30 22:37:22.468220', 'step': 20456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.499489', 'step': 20456, 'epoch': 3} {'type': 'loss', 'content': 0.0478568859398365, 'timestamp': '2025-09-30 22:37:22.504671', 'step': 20457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.538301', 'step': 20457, 'epoch': 3} {'type': 'loss', 'content': 0.08832928538322449, 'timestamp': '2025-09-30 22:37:22.543422', 'step': 20458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:22.577569', 'step': 20458, 'epoch': 3} {'type': 'loss', 'content': 0.1033574640750885, 'timestamp': '2025-09-30 22:37:22.583120', 'step': 20459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:22.614235', 'step': 20459, 'epoch': 3} {'type': 'loss', 'content': 0.05916809290647507, 'timestamp': '2025-09-30 22:37:22.650257', 'step': 20460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:22.682817', 'step': 20460, 'epoch': 3} {'type': 'loss', 'content': 0.030636541545391083, 'timestamp': '2025-09-30 22:37:22.686437', 'step': 20461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.718727', 'step': 20461, 'epoch': 3} {'type': 'loss', 'content': 0.06070689857006073, 'timestamp': '2025-09-30 22:37:22.725393', 'step': 20462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:22.757833', 'step': 20462, 'epoch': 3} {'type': 'loss', 'content': 0.06612887233495712, 'timestamp': '2025-09-30 22:37:22.769353', 'step': 20463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.801459', 'step': 20463, 'epoch': 3} {'type': 'loss', 'content': 0.029635760933160782, 'timestamp': '2025-09-30 22:37:22.833600', 'step': 20464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:22.866855', 'step': 20464, 'epoch': 3} {'type': 'loss', 'content': 0.03226041793823242, 'timestamp': '2025-09-30 22:37:22.871755', 'step': 20465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.904063', 'step': 20465, 'epoch': 3} {'type': 'loss', 'content': 0.05440310388803482, 'timestamp': '2025-09-30 22:37:22.915396', 'step': 20466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:22.948740', 'step': 20466, 'epoch': 3} {'type': 'loss', 'content': 0.02257630228996277, 'timestamp': '2025-09-30 22:37:22.953338', 'step': 20467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:22.987352', 'step': 20467, 'epoch': 3} {'type': 'loss', 'content': 0.06630664318799973, 'timestamp': '2025-09-30 22:37:23.013204', 'step': 20468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:23.051344', 'step': 20468, 'epoch': 3} {'type': 'loss', 'content': 0.0773978978395462, 'timestamp': '2025-09-30 22:37:23.055051', 'step': 20469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.087293', 'step': 20469, 'epoch': 3} {'type': 'loss', 'content': 0.053448569029569626, 'timestamp': '2025-09-30 22:37:23.092621', 'step': 20470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.125669', 'step': 20470, 'epoch': 3} {'type': 'loss', 'content': 0.12385197728872299, 'timestamp': '2025-09-30 22:37:23.130992', 'step': 20471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:23.165120', 'step': 20471, 'epoch': 3} {'type': 'loss', 'content': 0.13184107840061188, 'timestamp': '2025-09-30 22:37:23.195496', 'step': 20472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.238104', 'step': 20472, 'epoch': 3} {'type': 'loss', 'content': 0.10178811848163605, 'timestamp': '2025-09-30 22:37:23.248709', 'step': 20473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:23.283845', 'step': 20473, 'epoch': 3} {'type': 'loss', 'content': 0.047953683882951736, 'timestamp': '2025-09-30 22:37:23.287775', 'step': 20474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:23.320529', 'step': 20474, 'epoch': 3} {'type': 'loss', 'content': 0.0641619861125946, 'timestamp': '2025-09-30 22:37:23.325933', 'step': 20475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:23.357136', 'step': 20475, 'epoch': 3} {'type': 'loss', 'content': 0.029330318793654442, 'timestamp': '2025-09-30 22:37:23.388028', 'step': 20476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.421282', 'step': 20476, 'epoch': 3} {'type': 'loss', 'content': 0.0566532276570797, 'timestamp': '2025-09-30 22:37:23.424715', 'step': 20477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:23.459015', 'step': 20477, 'epoch': 3} {'type': 'loss', 'content': 0.07280533760786057, 'timestamp': '2025-09-30 22:37:23.462607', 'step': 20478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:23.493838', 'step': 20478, 'epoch': 3} {'type': 'loss', 'content': 0.10090899467468262, 'timestamp': '2025-09-30 22:37:23.499686', 'step': 20479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.532627', 'step': 20479, 'epoch': 3} {'type': 'loss', 'content': 0.0583336278796196, 'timestamp': '2025-09-30 22:37:23.567656', 'step': 20480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:23.606794', 'step': 20480, 'epoch': 3} {'type': 'loss', 'content': 0.05915573611855507, 'timestamp': '2025-09-30 22:37:23.610858', 'step': 20481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:23.653517', 'step': 20481, 'epoch': 3} {'type': 'loss', 'content': 0.01515493169426918, 'timestamp': '2025-09-30 22:37:23.656826', 'step': 20482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:23.689192', 'step': 20482, 'epoch': 3} {'type': 'loss', 'content': 0.07992332428693771, 'timestamp': '2025-09-30 22:37:23.693718', 'step': 20483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:23.725192', 'step': 20483, 'epoch': 3} {'type': 'loss', 'content': 0.06675252318382263, 'timestamp': '2025-09-30 22:37:23.763573', 'step': 20484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:23.799280', 'step': 20484, 'epoch': 3} {'type': 'loss', 'content': 0.07605867832899094, 'timestamp': '2025-09-30 22:37:23.802115', 'step': 20485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.833325', 'step': 20485, 'epoch': 3} {'type': 'loss', 'content': 0.18755707144737244, 'timestamp': '2025-09-30 22:37:23.844761', 'step': 20486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.883264', 'step': 20486, 'epoch': 3} {'type': 'loss', 'content': 0.03635797277092934, 'timestamp': '2025-09-30 22:37:23.887111', 'step': 20487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:23.919057', 'step': 20487, 'epoch': 3} {'type': 'loss', 'content': 0.04554859548807144, 'timestamp': '2025-09-30 22:37:23.951855', 'step': 20488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:23.983341', 'step': 20488, 'epoch': 3} {'type': 'loss', 'content': 0.06459040939807892, 'timestamp': '2025-09-30 22:37:23.986594', 'step': 20489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:24.035795', 'step': 20489, 'epoch': 3} {'type': 'loss', 'content': 0.06228223443031311, 'timestamp': '2025-09-30 22:37:24.043276', 'step': 20490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:24.090717', 'step': 20490, 'epoch': 3} {'type': 'loss', 'content': 0.049997713416814804, 'timestamp': '2025-09-30 22:37:24.096009', 'step': 20491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:24.128858', 'step': 20491, 'epoch': 3} {'type': 'loss', 'content': 0.04305695742368698, 'timestamp': '2025-09-30 22:37:24.163754', 'step': 20492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:24.197819', 'step': 20492, 'epoch': 3} {'type': 'loss', 'content': 0.005279123783111572, 'timestamp': '2025-09-30 22:37:24.202239', 'step': 20493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:24.257360', 'step': 20493, 'epoch': 3} {'type': 'loss', 'content': 0.05208197608590126, 'timestamp': '2025-09-30 22:37:24.263436', 'step': 20494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:24.295326', 'step': 20494, 'epoch': 3} {'type': 'loss', 'content': 0.03201611712574959, 'timestamp': '2025-09-30 22:37:24.299778', 'step': 20495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:24.332939', 'step': 20495, 'epoch': 3} {'type': 'loss', 'content': 0.01189094502478838, 'timestamp': '2025-09-30 22:37:24.371384', 'step': 20496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:24.407506', 'step': 20496, 'epoch': 3} {'type': 'loss', 'content': 0.17979241907596588, 'timestamp': '2025-09-30 22:37:24.410954', 'step': 20497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:24.441594', 'step': 20497, 'epoch': 3} {'type': 'loss', 'content': 0.02440665289759636, 'timestamp': '2025-09-30 22:37:24.450344', 'step': 20498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:24.487084', 'step': 20498, 'epoch': 3} {'type': 'loss', 'content': 0.07775947451591492, 'timestamp': '2025-09-30 22:37:24.490129', 'step': 20499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:24.521265', 'step': 20499, 'epoch': 3} {'type': 'loss', 'content': 0.09577298164367676, 'timestamp': '2025-09-30 22:37:24.549032', 'step': 20500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 20500', 'timestamp': '2025-09-30 22:37:29.586949', 'step': 20500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:29.635271', 'step': 20500, 'epoch': 3} {'type': 'loss', 'content': 0.019926829263567924, 'timestamp': '2025-09-30 22:37:29.642810', 'step': 20501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:29.676725', 'step': 20501, 'epoch': 3} {'type': 'loss', 'content': 0.056413281708955765, 'timestamp': '2025-09-30 22:37:29.682561', 'step': 20502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:29.730288', 'step': 20502, 'epoch': 3} {'type': 'loss', 'content': 0.04975308105349541, 'timestamp': '2025-09-30 22:37:29.735272', 'step': 20503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:29.767705', 'step': 20503, 'epoch': 3} {'type': 'loss', 'content': 0.05220364034175873, 'timestamp': '2025-09-30 22:37:29.793747', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:37:37.797000', 'step': 20504, 'epoch': 3} {'type': 'pplx', 'content': 9428.561041273679, 'timestamp': '2025-09-30 22:37:37.802017', 'step': 20504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:37.841714', 'step': 20504, 'epoch': 3} {'type': 'loss', 'content': 0.10754168778657913, 'timestamp': '2025-09-30 22:37:37.851365', 'step': 20505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:37.903236', 'step': 20505, 'epoch': 3} {'type': 'loss', 'content': 0.01105616707354784, 'timestamp': '2025-09-30 22:37:37.906743', 'step': 20506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:37.942075', 'step': 20506, 'epoch': 3} {'type': 'loss', 'content': 0.03774872049689293, 'timestamp': '2025-09-30 22:37:37.952520', 'step': 20507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:37.991830', 'step': 20507, 'epoch': 3} {'type': 'loss', 'content': 0.03627204895019531, 'timestamp': '2025-09-30 22:37:38.017140', 'step': 20508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.053489', 'step': 20508, 'epoch': 3} {'type': 'loss', 'content': 0.07954058796167374, 'timestamp': '2025-09-30 22:37:38.060550', 'step': 20509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:38.097882', 'step': 20509, 'epoch': 3} {'type': 'loss', 'content': 0.0271577350795269, 'timestamp': '2025-09-30 22:37:38.102255', 'step': 20510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.135977', 'step': 20510, 'epoch': 3} {'type': 'loss', 'content': 0.06282658874988556, 'timestamp': '2025-09-30 22:37:38.148021', 'step': 20511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:38.184421', 'step': 20511, 'epoch': 3} {'type': 'loss', 'content': 0.03540667146444321, 'timestamp': '2025-09-30 22:37:38.214323', 'step': 20512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:38.249500', 'step': 20512, 'epoch': 3} {'type': 'loss', 'content': 0.06781274825334549, 'timestamp': '2025-09-30 22:37:38.258137', 'step': 20513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.302106', 'step': 20513, 'epoch': 3} {'type': 'loss', 'content': 0.08107493072748184, 'timestamp': '2025-09-30 22:37:38.312554', 'step': 20514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:38.354030', 'step': 20514, 'epoch': 3} {'type': 'loss', 'content': 0.06305103003978729, 'timestamp': '2025-09-30 22:37:38.357456', 'step': 20515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:38.410896', 'step': 20515, 'epoch': 3} {'type': 'loss', 'content': 0.07010983675718307, 'timestamp': '2025-09-30 22:37:38.439589', 'step': 20516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.476551', 'step': 20516, 'epoch': 3} {'type': 'loss', 'content': 0.0929664894938469, 'timestamp': '2025-09-30 22:37:38.480351', 'step': 20517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.522412', 'step': 20517, 'epoch': 3} {'type': 'loss', 'content': 0.03718867152929306, 'timestamp': '2025-09-30 22:37:38.527142', 'step': 20518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:38.562602', 'step': 20518, 'epoch': 3} {'type': 'loss', 'content': 0.11352356523275375, 'timestamp': '2025-09-30 22:37:38.566256', 'step': 20519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:38.611817', 'step': 20519, 'epoch': 3} {'type': 'loss', 'content': 0.08093958348035812, 'timestamp': '2025-09-30 22:37:38.637314', 'step': 20520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.674764', 'step': 20520, 'epoch': 3} {'type': 'loss', 'content': 0.09594757854938507, 'timestamp': '2025-09-30 22:37:38.678831', 'step': 20521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:38.713092', 'step': 20521, 'epoch': 3} {'type': 'loss', 'content': 0.07738886028528214, 'timestamp': '2025-09-30 22:37:38.718848', 'step': 20522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.752204', 'step': 20522, 'epoch': 3} {'type': 'loss', 'content': 0.07660845667123795, 'timestamp': '2025-09-30 22:37:38.756025', 'step': 20523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.799738', 'step': 20523, 'epoch': 3} {'type': 'loss', 'content': 0.036495257169008255, 'timestamp': '2025-09-30 22:37:38.825424', 'step': 20524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:38.860750', 'step': 20524, 'epoch': 3} {'type': 'loss', 'content': 0.10568580031394958, 'timestamp': '2025-09-30 22:37:38.864643', 'step': 20525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:38.898936', 'step': 20525, 'epoch': 3} {'type': 'loss', 'content': 0.03860557824373245, 'timestamp': '2025-09-30 22:37:38.904092', 'step': 20526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:38.940515', 'step': 20526, 'epoch': 3} {'type': 'loss', 'content': 0.09018071740865707, 'timestamp': '2025-09-30 22:37:38.942942', 'step': 20527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:38.976508', 'step': 20527, 'epoch': 3} {'type': 'loss', 'content': 0.09415645152330399, 'timestamp': '2025-09-30 22:37:39.002795', 'step': 20528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.038278', 'step': 20528, 'epoch': 3} {'type': 'loss', 'content': 0.058945197612047195, 'timestamp': '2025-09-30 22:37:39.042787', 'step': 20529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:39.092663', 'step': 20529, 'epoch': 3} {'type': 'loss', 'content': 0.03528807312250137, 'timestamp': '2025-09-30 22:37:39.097964', 'step': 20530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.133366', 'step': 20530, 'epoch': 3} {'type': 'loss', 'content': 0.05384906753897667, 'timestamp': '2025-09-30 22:37:39.137320', 'step': 20531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:39.171736', 'step': 20531, 'epoch': 3} {'type': 'loss', 'content': 0.035852573812007904, 'timestamp': '2025-09-30 22:37:39.197651', 'step': 20532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:39.233963', 'step': 20532, 'epoch': 3} {'type': 'loss', 'content': 0.07307079434394836, 'timestamp': '2025-09-30 22:37:39.238405', 'step': 20533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.284084', 'step': 20533, 'epoch': 3} {'type': 'loss', 'content': 0.030701307579874992, 'timestamp': '2025-09-30 22:37:39.287247', 'step': 20534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.321996', 'step': 20534, 'epoch': 3} {'type': 'loss', 'content': 0.055825475603342056, 'timestamp': '2025-09-30 22:37:39.327210', 'step': 20535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:39.376895', 'step': 20535, 'epoch': 3} {'type': 'loss', 'content': 0.03621361032128334, 'timestamp': '2025-09-30 22:37:39.402797', 'step': 20536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.438788', 'step': 20536, 'epoch': 3} {'type': 'loss', 'content': 0.06913810968399048, 'timestamp': '2025-09-30 22:37:39.443028', 'step': 20537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.476613', 'step': 20537, 'epoch': 3} {'type': 'loss', 'content': 0.09560742974281311, 'timestamp': '2025-09-30 22:37:39.483877', 'step': 20538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:39.532431', 'step': 20538, 'epoch': 3} {'type': 'loss', 'content': 0.03713877126574516, 'timestamp': '2025-09-30 22:37:39.537978', 'step': 20539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.584259', 'step': 20539, 'epoch': 3} {'type': 'loss', 'content': 0.09108515083789825, 'timestamp': '2025-09-30 22:37:39.610322', 'step': 20540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:39.646444', 'step': 20540, 'epoch': 3} {'type': 'loss', 'content': 0.11392363905906677, 'timestamp': '2025-09-30 22:37:39.658087', 'step': 20541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:39.694348', 'step': 20541, 'epoch': 3} {'type': 'loss', 'content': 0.034396376460790634, 'timestamp': '2025-09-30 22:37:39.707772', 'step': 20542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:39.744384', 'step': 20542, 'epoch': 3} {'type': 'loss', 'content': 0.11191871762275696, 'timestamp': '2025-09-30 22:37:39.758633', 'step': 20543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:39.799045', 'step': 20543, 'epoch': 3} {'type': 'loss', 'content': 0.08925198763608932, 'timestamp': '2025-09-30 22:37:39.830108', 'step': 20544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.870440', 'step': 20544, 'epoch': 3} {'type': 'loss', 'content': 0.09664773941040039, 'timestamp': '2025-09-30 22:37:39.885217', 'step': 20545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.928606', 'step': 20545, 'epoch': 3} {'type': 'loss', 'content': 0.13552191853523254, 'timestamp': '2025-09-30 22:37:39.932578', 'step': 20546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:39.969347', 'step': 20546, 'epoch': 3} {'type': 'loss', 'content': 0.12713581323623657, 'timestamp': '2025-09-30 22:37:39.974185', 'step': 20547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:40.011223', 'step': 20547, 'epoch': 3} {'type': 'loss', 'content': 0.10191652178764343, 'timestamp': '2025-09-30 22:37:40.041287', 'step': 20548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:40.076329', 'step': 20548, 'epoch': 3} {'type': 'loss', 'content': 0.056989505887031555, 'timestamp': '2025-09-30 22:37:40.080677', 'step': 20549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:40.117890', 'step': 20549, 'epoch': 3} {'type': 'loss', 'content': 0.06819425523281097, 'timestamp': '2025-09-30 22:37:40.129147', 'step': 20550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.164035', 'step': 20550, 'epoch': 3} {'type': 'loss', 'content': 0.13000938296318054, 'timestamp': '2025-09-30 22:37:40.167460', 'step': 20551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.207345', 'step': 20551, 'epoch': 3} {'type': 'loss', 'content': 0.06540053337812424, 'timestamp': '2025-09-30 22:37:40.246800', 'step': 20552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.295601', 'step': 20552, 'epoch': 3} {'type': 'loss', 'content': 0.09371010214090347, 'timestamp': '2025-09-30 22:37:40.301080', 'step': 20553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:40.345761', 'step': 20553, 'epoch': 3} {'type': 'loss', 'content': 0.059311192482709885, 'timestamp': '2025-09-30 22:37:40.351483', 'step': 20554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.396452', 'step': 20554, 'epoch': 3} {'type': 'loss', 'content': 0.1433294266462326, 'timestamp': '2025-09-30 22:37:40.406764', 'step': 20555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:40.440383', 'step': 20555, 'epoch': 3} {'type': 'loss', 'content': 0.07884355634450912, 'timestamp': '2025-09-30 22:37:40.465872', 'step': 20556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:40.514117', 'step': 20556, 'epoch': 3} {'type': 'loss', 'content': 0.049717310816049576, 'timestamp': '2025-09-30 22:37:40.531680', 'step': 20557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:40.571864', 'step': 20557, 'epoch': 3} {'type': 'loss', 'content': 0.050825200974941254, 'timestamp': '2025-09-30 22:37:40.576264', 'step': 20558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.613005', 'step': 20558, 'epoch': 3} {'type': 'loss', 'content': 0.10572601109743118, 'timestamp': '2025-09-30 22:37:40.618202', 'step': 20559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:40.667309', 'step': 20559, 'epoch': 3} {'type': 'loss', 'content': 0.04356217011809349, 'timestamp': '2025-09-30 22:37:40.701118', 'step': 20560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.750519', 'step': 20560, 'epoch': 3} {'type': 'loss', 'content': 0.06370463967323303, 'timestamp': '2025-09-30 22:37:40.765963', 'step': 20561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:40.808660', 'step': 20561, 'epoch': 3} {'type': 'loss', 'content': 0.17742197215557098, 'timestamp': '2025-09-30 22:37:40.821774', 'step': 20562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:40.865382', 'step': 20562, 'epoch': 3} {'type': 'loss', 'content': 0.06937292963266373, 'timestamp': '2025-09-30 22:37:40.868885', 'step': 20563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.921340', 'step': 20563, 'epoch': 3} {'type': 'loss', 'content': 0.13263244926929474, 'timestamp': '2025-09-30 22:37:40.950670', 'step': 20564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:40.987920', 'step': 20564, 'epoch': 3} {'type': 'loss', 'content': 0.08416005969047546, 'timestamp': '2025-09-30 22:37:40.999633', 'step': 20565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:41.054878', 'step': 20565, 'epoch': 3} {'type': 'loss', 'content': 0.08688398450613022, 'timestamp': '2025-09-30 22:37:41.061316', 'step': 20566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:41.118696', 'step': 20566, 'epoch': 3} {'type': 'loss', 'content': 0.038596104830503464, 'timestamp': '2025-09-30 22:37:41.123256', 'step': 20567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:41.187101', 'step': 20567, 'epoch': 3} {'type': 'loss', 'content': 0.08906727284193039, 'timestamp': '2025-09-30 22:37:41.213220', 'step': 20568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:41.259859', 'step': 20568, 'epoch': 3} {'type': 'loss', 'content': 0.0708347037434578, 'timestamp': '2025-09-30 22:37:41.267843', 'step': 20569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:41.317699', 'step': 20569, 'epoch': 3} {'type': 'loss', 'content': 0.0600946880877018, 'timestamp': '2025-09-30 22:37:41.322634', 'step': 20570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:41.364655', 'step': 20570, 'epoch': 3} {'type': 'loss', 'content': 0.04537900164723396, 'timestamp': '2025-09-30 22:37:41.373257', 'step': 20571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:41.422492', 'step': 20571, 'epoch': 3} {'type': 'loss', 'content': 0.01031789556145668, 'timestamp': '2025-09-30 22:37:41.450440', 'step': 20572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:41.497374', 'step': 20572, 'epoch': 3} {'type': 'loss', 'content': 0.04090246930718422, 'timestamp': '2025-09-30 22:37:41.514693', 'step': 20573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:41.562503', 'step': 20573, 'epoch': 3} {'type': 'loss', 'content': 0.11320190131664276, 'timestamp': '2025-09-30 22:37:41.567359', 'step': 20574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:41.617053', 'step': 20574, 'epoch': 3} {'type': 'loss', 'content': 0.1017770767211914, 'timestamp': '2025-09-30 22:37:41.620808', 'step': 20575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:41.654911', 'step': 20575, 'epoch': 3} {'type': 'loss', 'content': 0.12503282725811005, 'timestamp': '2025-09-30 22:37:41.680695', 'step': 20576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:41.728082', 'step': 20576, 'epoch': 3} {'type': 'loss', 'content': 0.05739044398069382, 'timestamp': '2025-09-30 22:37:41.739845', 'step': 20577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:41.787561', 'step': 20577, 'epoch': 3} {'type': 'loss', 'content': 0.13257727026939392, 'timestamp': '2025-09-30 22:37:41.810991', 'step': 20578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:41.847385', 'step': 20578, 'epoch': 3} {'type': 'loss', 'content': 0.04119202494621277, 'timestamp': '2025-09-30 22:37:41.861429', 'step': 20579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:41.905584', 'step': 20579, 'epoch': 3} {'type': 'loss', 'content': 0.07621996849775314, 'timestamp': '2025-09-30 22:37:41.944672', 'step': 20580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:41.980564', 'step': 20580, 'epoch': 3} {'type': 'loss', 'content': 0.06362080574035645, 'timestamp': '2025-09-30 22:37:41.991636', 'step': 20581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:42.028460', 'step': 20581, 'epoch': 3} {'type': 'loss', 'content': 0.06177584454417229, 'timestamp': '2025-09-30 22:37:42.037132', 'step': 20582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:42.073328', 'step': 20582, 'epoch': 3} {'type': 'loss', 'content': 0.042701900005340576, 'timestamp': '2025-09-30 22:37:42.078376', 'step': 20583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:42.124130', 'step': 20583, 'epoch': 3} {'type': 'loss', 'content': 0.05248991400003433, 'timestamp': '2025-09-30 22:37:42.159246', 'step': 20584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.213729', 'step': 20584, 'epoch': 3} {'type': 'loss', 'content': 0.023288432508707047, 'timestamp': '2025-09-30 22:37:42.229159', 'step': 20585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:42.273908', 'step': 20585, 'epoch': 3} {'type': 'loss', 'content': 0.008511778898537159, 'timestamp': '2025-09-30 22:37:42.288751', 'step': 20586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.332990', 'step': 20586, 'epoch': 3} {'type': 'loss', 'content': 0.05491719767451286, 'timestamp': '2025-09-30 22:37:42.340857', 'step': 20587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:42.374702', 'step': 20587, 'epoch': 3} {'type': 'loss', 'content': 0.09880698472261429, 'timestamp': '2025-09-30 22:37:42.410361', 'step': 20588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:37:42.453219', 'step': 20588, 'epoch': 3} {'type': 'loss', 'content': 0.02853931486606598, 'timestamp': '2025-09-30 22:37:42.459603', 'step': 20589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.508384', 'step': 20589, 'epoch': 3} {'type': 'loss', 'content': 0.05246608331799507, 'timestamp': '2025-09-30 22:37:42.526762', 'step': 20590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:42.565742', 'step': 20590, 'epoch': 3} {'type': 'loss', 'content': 0.07208666950464249, 'timestamp': '2025-09-30 22:37:42.571841', 'step': 20591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.625862', 'step': 20591, 'epoch': 3} {'type': 'loss', 'content': 0.08862683922052383, 'timestamp': '2025-09-30 22:37:42.651835', 'step': 20592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:42.686002', 'step': 20592, 'epoch': 3} {'type': 'loss', 'content': 0.19140437245368958, 'timestamp': '2025-09-30 22:37:42.699680', 'step': 20593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.734554', 'step': 20593, 'epoch': 3} {'type': 'loss', 'content': 0.046173613518476486, 'timestamp': '2025-09-30 22:37:42.744570', 'step': 20594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:42.780464', 'step': 20594, 'epoch': 3} {'type': 'loss', 'content': 0.01555273775011301, 'timestamp': '2025-09-30 22:37:42.794077', 'step': 20595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:42.829550', 'step': 20595, 'epoch': 3} {'type': 'loss', 'content': 0.08174766600131989, 'timestamp': '2025-09-30 22:37:42.869318', 'step': 20596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:42.910391', 'step': 20596, 'epoch': 3} {'type': 'loss', 'content': 0.04442685469985008, 'timestamp': '2025-09-30 22:37:42.923573', 'step': 20597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:42.963385', 'step': 20597, 'epoch': 3} {'type': 'loss', 'content': 0.041445259004831314, 'timestamp': '2025-09-30 22:37:42.966787', 'step': 20598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:43.002209', 'step': 20598, 'epoch': 3} {'type': 'loss', 'content': 0.03993523493409157, 'timestamp': '2025-09-30 22:37:43.014915', 'step': 20599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:43.061283', 'step': 20599, 'epoch': 3} {'type': 'loss', 'content': 0.05296735838055611, 'timestamp': '2025-09-30 22:37:43.099449', 'step': 20600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:43.137710', 'step': 20600, 'epoch': 3} {'type': 'loss', 'content': 0.06292930245399475, 'timestamp': '2025-09-30 22:37:43.143124', 'step': 20601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:43.179608', 'step': 20601, 'epoch': 3} {'type': 'loss', 'content': 0.08924813568592072, 'timestamp': '2025-09-30 22:37:43.190162', 'step': 20602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:43.227028', 'step': 20602, 'epoch': 3} {'type': 'loss', 'content': 0.05662277340888977, 'timestamp': '2025-09-30 22:37:43.241183', 'step': 20603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:43.278282', 'step': 20603, 'epoch': 3} {'type': 'loss', 'content': 0.13355736434459686, 'timestamp': '2025-09-30 22:37:43.304065', 'step': 20604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:43.344698', 'step': 20604, 'epoch': 3} {'type': 'loss', 'content': 0.05187679082155228, 'timestamp': '2025-09-30 22:37:43.350379', 'step': 20605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:43.393785', 'step': 20605, 'epoch': 3} {'type': 'loss', 'content': 0.08637674897909164, 'timestamp': '2025-09-30 22:37:43.397239', 'step': 20606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:43.440018', 'step': 20606, 'epoch': 3} {'type': 'loss', 'content': 0.09126938134431839, 'timestamp': '2025-09-30 22:37:43.452657', 'step': 20607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:43.498425', 'step': 20607, 'epoch': 3} {'type': 'loss', 'content': 0.05440393090248108, 'timestamp': '2025-09-30 22:37:43.523448', 'step': 20608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:43.563641', 'step': 20608, 'epoch': 3} {'type': 'loss', 'content': 0.10261164605617523, 'timestamp': '2025-09-30 22:37:43.567698', 'step': 20609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:43.604385', 'step': 20609, 'epoch': 3} {'type': 'loss', 'content': 0.11034265905618668, 'timestamp': '2025-09-30 22:37:43.610754', 'step': 20610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:43.649645', 'step': 20610, 'epoch': 3} {'type': 'loss', 'content': 0.07717232406139374, 'timestamp': '2025-09-30 22:37:43.654142', 'step': 20611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:43.689896', 'step': 20611, 'epoch': 3} {'type': 'loss', 'content': 0.050780557096004486, 'timestamp': '2025-09-30 22:37:43.716429', 'step': 20612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:43.764220', 'step': 20612, 'epoch': 3} {'type': 'loss', 'content': 0.029870329424738884, 'timestamp': '2025-09-30 22:37:43.773991', 'step': 20613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:43.820361', 'step': 20613, 'epoch': 3} {'type': 'loss', 'content': 0.08894708007574081, 'timestamp': '2025-09-30 22:37:43.825762', 'step': 20614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:43.869070', 'step': 20614, 'epoch': 3} {'type': 'loss', 'content': 0.07369264960289001, 'timestamp': '2025-09-30 22:37:43.881078', 'step': 20615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:43.919124', 'step': 20615, 'epoch': 3} {'type': 'loss', 'content': 0.09132921695709229, 'timestamp': '2025-09-30 22:37:43.955611', 'step': 20616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.002513', 'step': 20616, 'epoch': 3} {'type': 'loss', 'content': 0.04025038331747055, 'timestamp': '2025-09-30 22:37:44.005702', 'step': 20617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.039356', 'step': 20617, 'epoch': 3} {'type': 'loss', 'content': 0.07905855774879456, 'timestamp': '2025-09-30 22:37:44.053823', 'step': 20618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:44.092560', 'step': 20618, 'epoch': 3} {'type': 'loss', 'content': 0.05584372952580452, 'timestamp': '2025-09-30 22:37:44.104829', 'step': 20619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:44.154188', 'step': 20619, 'epoch': 3} {'type': 'loss', 'content': 0.034071456640958786, 'timestamp': '2025-09-30 22:37:44.193302', 'step': 20620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.228462', 'step': 20620, 'epoch': 3} {'type': 'loss', 'content': 0.07392679899930954, 'timestamp': '2025-09-30 22:37:44.236361', 'step': 20621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:44.275210', 'step': 20621, 'epoch': 3} {'type': 'loss', 'content': 0.03278316557407379, 'timestamp': '2025-09-30 22:37:44.284831', 'step': 20622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:44.319406', 'step': 20622, 'epoch': 3} {'type': 'loss', 'content': 0.09520974010229111, 'timestamp': '2025-09-30 22:37:44.324149', 'step': 20623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:44.359320', 'step': 20623, 'epoch': 3} {'type': 'loss', 'content': 0.04717659950256348, 'timestamp': '2025-09-30 22:37:44.384647', 'step': 20624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:44.421655', 'step': 20624, 'epoch': 3} {'type': 'loss', 'content': 0.0699039027094841, 'timestamp': '2025-09-30 22:37:44.426244', 'step': 20625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.471035', 'step': 20625, 'epoch': 3} {'type': 'loss', 'content': 0.03671645000576973, 'timestamp': '2025-09-30 22:37:44.475696', 'step': 20626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:44.509022', 'step': 20626, 'epoch': 3} {'type': 'loss', 'content': 0.07958677411079407, 'timestamp': '2025-09-30 22:37:44.513162', 'step': 20627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:44.545983', 'step': 20627, 'epoch': 3} {'type': 'loss', 'content': 0.04627275839447975, 'timestamp': '2025-09-30 22:37:44.570137', 'step': 20628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:44.607545', 'step': 20628, 'epoch': 3} {'type': 'loss', 'content': 0.0646335706114769, 'timestamp': '2025-09-30 22:37:44.610590', 'step': 20629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:44.645958', 'step': 20629, 'epoch': 3} {'type': 'loss', 'content': 0.07415637373924255, 'timestamp': '2025-09-30 22:37:44.653311', 'step': 20630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.694166', 'step': 20630, 'epoch': 3} {'type': 'loss', 'content': 0.09011749178171158, 'timestamp': '2025-09-30 22:37:44.703424', 'step': 20631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:44.748789', 'step': 20631, 'epoch': 3} {'type': 'loss', 'content': 0.02505647763609886, 'timestamp': '2025-09-30 22:37:44.775008', 'step': 20632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:44.810944', 'step': 20632, 'epoch': 3} {'type': 'loss', 'content': 0.044878650456666946, 'timestamp': '2025-09-30 22:37:44.814799', 'step': 20633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:44.852847', 'step': 20633, 'epoch': 3} {'type': 'loss', 'content': 0.06520096212625504, 'timestamp': '2025-09-30 22:37:44.857070', 'step': 20634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:44.893751', 'step': 20634, 'epoch': 3} {'type': 'loss', 'content': 0.05078229680657387, 'timestamp': '2025-09-30 22:37:44.897312', 'step': 20635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:44.950082', 'step': 20635, 'epoch': 3} {'type': 'loss', 'content': 0.027750520035624504, 'timestamp': '2025-09-30 22:37:44.975424', 'step': 20636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.020704', 'step': 20636, 'epoch': 3} {'type': 'loss', 'content': 0.07132779061794281, 'timestamp': '2025-09-30 22:37:45.033629', 'step': 20637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:45.085084', 'step': 20637, 'epoch': 3} {'type': 'loss', 'content': 0.008041569031774998, 'timestamp': '2025-09-30 22:37:45.102019', 'step': 20638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.148435', 'step': 20638, 'epoch': 3} {'type': 'loss', 'content': 0.05429697781801224, 'timestamp': '2025-09-30 22:37:45.165788', 'step': 20639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.215458', 'step': 20639, 'epoch': 3} {'type': 'loss', 'content': 0.10360705107450485, 'timestamp': '2025-09-30 22:37:45.244522', 'step': 20640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:45.295586', 'step': 20640, 'epoch': 3} {'type': 'loss', 'content': 0.02431265078485012, 'timestamp': '2025-09-30 22:37:45.301257', 'step': 20641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.352318', 'step': 20641, 'epoch': 3} {'type': 'loss', 'content': 0.10679671168327332, 'timestamp': '2025-09-30 22:37:45.358444', 'step': 20642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.396701', 'step': 20642, 'epoch': 3} {'type': 'loss', 'content': 0.08091498911380768, 'timestamp': '2025-09-30 22:37:45.400805', 'step': 20643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.437224', 'step': 20643, 'epoch': 3} {'type': 'loss', 'content': 0.007123191840946674, 'timestamp': '2025-09-30 22:37:45.463624', 'step': 20644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.501604', 'step': 20644, 'epoch': 3} {'type': 'loss', 'content': 0.032044410705566406, 'timestamp': '2025-09-30 22:37:45.510451', 'step': 20645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:45.551134', 'step': 20645, 'epoch': 3} {'type': 'loss', 'content': 0.08980734646320343, 'timestamp': '2025-09-30 22:37:45.560232', 'step': 20646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:45.619123', 'step': 20646, 'epoch': 3} {'type': 'loss', 'content': 0.03439228609204292, 'timestamp': '2025-09-30 22:37:45.622454', 'step': 20647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.680266', 'step': 20647, 'epoch': 3} {'type': 'loss', 'content': 0.04139302298426628, 'timestamp': '2025-09-30 22:37:45.705300', 'step': 20648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.752782', 'step': 20648, 'epoch': 3} {'type': 'loss', 'content': 0.05496570095419884, 'timestamp': '2025-09-30 22:37:45.759152', 'step': 20649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.799127', 'step': 20649, 'epoch': 3} {'type': 'loss', 'content': 0.09126996994018555, 'timestamp': '2025-09-30 22:37:45.803639', 'step': 20650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:45.843594', 'step': 20650, 'epoch': 3} {'type': 'loss', 'content': 0.03226403892040253, 'timestamp': '2025-09-30 22:37:45.847660', 'step': 20651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:45.910376', 'step': 20651, 'epoch': 3} {'type': 'loss', 'content': 0.06163310259580612, 'timestamp': '2025-09-30 22:37:45.935554', 'step': 20652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:45.976662', 'step': 20652, 'epoch': 3} {'type': 'loss', 'content': 0.06867276132106781, 'timestamp': '2025-09-30 22:37:45.982384', 'step': 20653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:46.025088', 'step': 20653, 'epoch': 3} {'type': 'loss', 'content': 0.042647380381822586, 'timestamp': '2025-09-30 22:37:46.030636', 'step': 20654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:46.076689', 'step': 20654, 'epoch': 3} {'type': 'loss', 'content': 0.05254337564110756, 'timestamp': '2025-09-30 22:37:46.083710', 'step': 20655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:46.129494', 'step': 20655, 'epoch': 3} {'type': 'loss', 'content': 0.13708552718162537, 'timestamp': '2025-09-30 22:37:46.154952', 'step': 20656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:46.199296', 'step': 20656, 'epoch': 3} {'type': 'loss', 'content': 0.047072891145944595, 'timestamp': '2025-09-30 22:37:46.203136', 'step': 20657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:46.249601', 'step': 20657, 'epoch': 3} {'type': 'loss', 'content': 0.07335969060659409, 'timestamp': '2025-09-30 22:37:46.266151', 'step': 20658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.299569', 'step': 20658, 'epoch': 3} {'type': 'loss', 'content': 0.07927645742893219, 'timestamp': '2025-09-30 22:37:46.304131', 'step': 20659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:46.350068', 'step': 20659, 'epoch': 3} {'type': 'loss', 'content': 0.07033602893352509, 'timestamp': '2025-09-30 22:37:46.374548', 'step': 20660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.431803', 'step': 20660, 'epoch': 3} {'type': 'loss', 'content': 0.03561653941869736, 'timestamp': '2025-09-30 22:37:46.445262', 'step': 20661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:46.493229', 'step': 20661, 'epoch': 3} {'type': 'loss', 'content': 0.039210643619298935, 'timestamp': '2025-09-30 22:37:46.502088', 'step': 20662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.542392', 'step': 20662, 'epoch': 3} {'type': 'loss', 'content': 0.02957926131784916, 'timestamp': '2025-09-30 22:37:46.546816', 'step': 20663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:46.596639', 'step': 20663, 'epoch': 3} {'type': 'loss', 'content': 0.04871816188097, 'timestamp': '2025-09-30 22:37:46.621388', 'step': 20664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.668333', 'step': 20664, 'epoch': 3} {'type': 'loss', 'content': 0.0967131033539772, 'timestamp': '2025-09-30 22:37:46.671718', 'step': 20665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.711162', 'step': 20665, 'epoch': 3} {'type': 'loss', 'content': 0.09870816767215729, 'timestamp': '2025-09-30 22:37:46.716716', 'step': 20666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.762927', 'step': 20666, 'epoch': 3} {'type': 'loss', 'content': 0.04926511272788048, 'timestamp': '2025-09-30 22:37:46.769184', 'step': 20667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:46.817395', 'step': 20667, 'epoch': 3} {'type': 'loss', 'content': 0.055976491421461105, 'timestamp': '2025-09-30 22:37:46.857256', 'step': 20668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.902925', 'step': 20668, 'epoch': 3} {'type': 'loss', 'content': 0.09424129128456116, 'timestamp': '2025-09-30 22:37:46.907737', 'step': 20669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:46.948032', 'step': 20669, 'epoch': 3} {'type': 'loss', 'content': 0.10731151700019836, 'timestamp': '2025-09-30 22:37:46.956868', 'step': 20670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.002498', 'step': 20670, 'epoch': 3} {'type': 'loss', 'content': 0.10919427871704102, 'timestamp': '2025-09-30 22:37:47.006417', 'step': 20671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.046075', 'step': 20671, 'epoch': 3} {'type': 'loss', 'content': 0.060535311698913574, 'timestamp': '2025-09-30 22:37:47.070958', 'step': 20672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:47.107094', 'step': 20672, 'epoch': 3} {'type': 'loss', 'content': 0.03889863193035126, 'timestamp': '2025-09-30 22:37:47.110555', 'step': 20673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:47.145864', 'step': 20673, 'epoch': 3} {'type': 'loss', 'content': 0.04052143916487694, 'timestamp': '2025-09-30 22:37:47.159371', 'step': 20674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:47.195222', 'step': 20674, 'epoch': 3} {'type': 'loss', 'content': 0.0804930031299591, 'timestamp': '2025-09-30 22:37:47.198247', 'step': 20675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.233662', 'step': 20675, 'epoch': 3} {'type': 'loss', 'content': 0.04047505930066109, 'timestamp': '2025-09-30 22:37:47.258408', 'step': 20676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:47.292084', 'step': 20676, 'epoch': 3} {'type': 'loss', 'content': 0.09451020509004593, 'timestamp': '2025-09-30 22:37:47.296369', 'step': 20677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:47.336144', 'step': 20677, 'epoch': 3} {'type': 'loss', 'content': 0.11452306807041168, 'timestamp': '2025-09-30 22:37:47.340416', 'step': 20678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:47.375778', 'step': 20678, 'epoch': 3} {'type': 'loss', 'content': 0.06776684522628784, 'timestamp': '2025-09-30 22:37:47.378251', 'step': 20679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.412766', 'step': 20679, 'epoch': 3} {'type': 'loss', 'content': 0.044456541538238525, 'timestamp': '2025-09-30 22:37:47.437934', 'step': 20680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.475223', 'step': 20680, 'epoch': 3} {'type': 'loss', 'content': 0.08545947074890137, 'timestamp': '2025-09-30 22:37:47.484821', 'step': 20681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:47.525839', 'step': 20681, 'epoch': 3} {'type': 'loss', 'content': 0.057211458683013916, 'timestamp': '2025-09-30 22:37:47.530566', 'step': 20682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.567600', 'step': 20682, 'epoch': 3} {'type': 'loss', 'content': 0.02285841293632984, 'timestamp': '2025-09-30 22:37:47.572061', 'step': 20683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.606095', 'step': 20683, 'epoch': 3} {'type': 'loss', 'content': 0.07233984023332596, 'timestamp': '2025-09-30 22:37:47.632753', 'step': 20684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:47.667480', 'step': 20684, 'epoch': 3} {'type': 'loss', 'content': 0.08534451574087143, 'timestamp': '2025-09-30 22:37:47.670176', 'step': 20685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:47.704858', 'step': 20685, 'epoch': 3} {'type': 'loss', 'content': 0.10851995646953583, 'timestamp': '2025-09-30 22:37:47.717189', 'step': 20686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:47.764035', 'step': 20686, 'epoch': 3} {'type': 'loss', 'content': 0.12024172395467758, 'timestamp': '2025-09-30 22:37:47.771043', 'step': 20687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:47.810342', 'step': 20687, 'epoch': 3} {'type': 'loss', 'content': 0.07161647081375122, 'timestamp': '2025-09-30 22:37:47.837530', 'step': 20688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:47.872665', 'step': 20688, 'epoch': 3} {'type': 'loss', 'content': 0.053331173956394196, 'timestamp': '2025-09-30 22:37:47.886311', 'step': 20689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:47.924988', 'step': 20689, 'epoch': 3} {'type': 'loss', 'content': 0.018784688785672188, 'timestamp': '2025-09-30 22:37:47.928274', 'step': 20690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:47.967729', 'step': 20690, 'epoch': 3} {'type': 'loss', 'content': 0.13653632998466492, 'timestamp': '2025-09-30 22:37:47.971497', 'step': 20691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.018346', 'step': 20691, 'epoch': 3} {'type': 'loss', 'content': 0.1680326610803604, 'timestamp': '2025-09-30 22:37:48.045474', 'step': 20692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.085249', 'step': 20692, 'epoch': 3} {'type': 'loss', 'content': 0.048196639865636826, 'timestamp': '2025-09-30 22:37:48.090346', 'step': 20693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.130501', 'step': 20693, 'epoch': 3} {'type': 'loss', 'content': 0.009566779248416424, 'timestamp': '2025-09-30 22:37:48.148727', 'step': 20694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:48.197329', 'step': 20694, 'epoch': 3} {'type': 'loss', 'content': 0.04724278673529625, 'timestamp': '2025-09-30 22:37:48.207350', 'step': 20695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.246624', 'step': 20695, 'epoch': 3} {'type': 'loss', 'content': 0.052740614861249924, 'timestamp': '2025-09-30 22:37:48.281038', 'step': 20696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.338571', 'step': 20696, 'epoch': 3} {'type': 'loss', 'content': 0.05451694130897522, 'timestamp': '2025-09-30 22:37:48.341617', 'step': 20697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.383335', 'step': 20697, 'epoch': 3} {'type': 'loss', 'content': 0.03980686515569687, 'timestamp': '2025-09-30 22:37:48.391699', 'step': 20698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:48.442207', 'step': 20698, 'epoch': 3} {'type': 'loss', 'content': 0.09441697597503662, 'timestamp': '2025-09-30 22:37:48.446146', 'step': 20699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:48.480940', 'step': 20699, 'epoch': 3} {'type': 'loss', 'content': 0.0707605630159378, 'timestamp': '2025-09-30 22:37:48.506219', 'step': 20700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:48.541928', 'step': 20700, 'epoch': 3} {'type': 'loss', 'content': 0.09323615580797195, 'timestamp': '2025-09-30 22:37:48.545954', 'step': 20701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.587930', 'step': 20701, 'epoch': 3} {'type': 'loss', 'content': 0.05641677975654602, 'timestamp': '2025-09-30 22:37:48.590972', 'step': 20702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.630409', 'step': 20702, 'epoch': 3} {'type': 'loss', 'content': 0.08974490314722061, 'timestamp': '2025-09-30 22:37:48.634145', 'step': 20703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:48.679996', 'step': 20703, 'epoch': 3} {'type': 'loss', 'content': 0.032144684344530106, 'timestamp': '2025-09-30 22:37:48.705088', 'step': 20704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:48.750498', 'step': 20704, 'epoch': 3} {'type': 'loss', 'content': 0.12641330063343048, 'timestamp': '2025-09-30 22:37:48.753633', 'step': 20705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:48.794621', 'step': 20705, 'epoch': 3} {'type': 'loss', 'content': 0.0866062343120575, 'timestamp': '2025-09-30 22:37:48.804172', 'step': 20706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.843604', 'step': 20706, 'epoch': 3} {'type': 'loss', 'content': 0.06389855593442917, 'timestamp': '2025-09-30 22:37:48.846545', 'step': 20707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.880512', 'step': 20707, 'epoch': 3} {'type': 'loss', 'content': 0.02246704138815403, 'timestamp': '2025-09-30 22:37:48.907200', 'step': 20708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.943669', 'step': 20708, 'epoch': 3} {'type': 'loss', 'content': 0.03808265179395676, 'timestamp': '2025-09-30 22:37:48.948863', 'step': 20709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:48.982278', 'step': 20709, 'epoch': 3} {'type': 'loss', 'content': 0.11885453760623932, 'timestamp': '2025-09-30 22:37:48.985357', 'step': 20710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.023165', 'step': 20710, 'epoch': 3} {'type': 'loss', 'content': 0.06298807263374329, 'timestamp': '2025-09-30 22:37:49.027813', 'step': 20711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.062553', 'step': 20711, 'epoch': 3} {'type': 'loss', 'content': 0.038793642073869705, 'timestamp': '2025-09-30 22:37:49.093935', 'step': 20712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:49.129599', 'step': 20712, 'epoch': 3} {'type': 'loss', 'content': 0.048728134483098984, 'timestamp': '2025-09-30 22:37:49.139816', 'step': 20713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.175225', 'step': 20713, 'epoch': 3} {'type': 'loss', 'content': 0.045331064611673355, 'timestamp': '2025-09-30 22:37:49.182269', 'step': 20714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.225099', 'step': 20714, 'epoch': 3} {'type': 'loss', 'content': 0.06485587358474731, 'timestamp': '2025-09-30 22:37:49.228157', 'step': 20715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:49.261891', 'step': 20715, 'epoch': 3} {'type': 'loss', 'content': 0.07677049189805984, 'timestamp': '2025-09-30 22:37:49.288386', 'step': 20716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.327716', 'step': 20716, 'epoch': 3} {'type': 'loss', 'content': 0.05059684440493584, 'timestamp': '2025-09-30 22:37:49.330949', 'step': 20717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.367814', 'step': 20717, 'epoch': 3} {'type': 'loss', 'content': 0.004807213321328163, 'timestamp': '2025-09-30 22:37:49.370869', 'step': 20718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.411060', 'step': 20718, 'epoch': 3} {'type': 'loss', 'content': 0.02539699524641037, 'timestamp': '2025-09-30 22:37:49.419915', 'step': 20719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:49.454119', 'step': 20719, 'epoch': 3} {'type': 'loss', 'content': 0.05641363933682442, 'timestamp': '2025-09-30 22:37:49.478962', 'step': 20720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.519548', 'step': 20720, 'epoch': 3} {'type': 'loss', 'content': 0.09072966128587723, 'timestamp': '2025-09-30 22:37:49.522416', 'step': 20721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:49.556845', 'step': 20721, 'epoch': 3} {'type': 'loss', 'content': 0.09278381615877151, 'timestamp': '2025-09-30 22:37:49.561763', 'step': 20722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.610960', 'step': 20722, 'epoch': 3} {'type': 'loss', 'content': 0.0028942530043423176, 'timestamp': '2025-09-30 22:37:49.615494', 'step': 20723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.649815', 'step': 20723, 'epoch': 3} {'type': 'loss', 'content': 0.03824133425951004, 'timestamp': '2025-09-30 22:37:49.680865', 'step': 20724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.715717', 'step': 20724, 'epoch': 3} {'type': 'loss', 'content': 0.04640991985797882, 'timestamp': '2025-09-30 22:37:49.722802', 'step': 20725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:49.765077', 'step': 20725, 'epoch': 3} {'type': 'loss', 'content': 0.05321744084358215, 'timestamp': '2025-09-30 22:37:49.773802', 'step': 20726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:49.809945', 'step': 20726, 'epoch': 3} {'type': 'loss', 'content': 0.06918610632419586, 'timestamp': '2025-09-30 22:37:49.822908', 'step': 20727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:49.857327', 'step': 20727, 'epoch': 3} {'type': 'loss', 'content': 0.14585433900356293, 'timestamp': '2025-09-30 22:37:49.886861', 'step': 20728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:49.931231', 'step': 20728, 'epoch': 3} {'type': 'loss', 'content': 0.024618756026029587, 'timestamp': '2025-09-30 22:37:49.935245', 'step': 20729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:49.979105', 'step': 20729, 'epoch': 3} {'type': 'loss', 'content': 0.01828533411026001, 'timestamp': '2025-09-30 22:37:49.984406', 'step': 20730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.021881', 'step': 20730, 'epoch': 3} {'type': 'loss', 'content': 0.026350880041718483, 'timestamp': '2025-09-30 22:37:50.027772', 'step': 20731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:50.065993', 'step': 20731, 'epoch': 3} {'type': 'loss', 'content': 0.025316676124930382, 'timestamp': '2025-09-30 22:37:50.093107', 'step': 20732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.127539', 'step': 20732, 'epoch': 3} {'type': 'loss', 'content': 0.055301323533058167, 'timestamp': '2025-09-30 22:37:50.135357', 'step': 20733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.183394', 'step': 20733, 'epoch': 3} {'type': 'loss', 'content': 0.0830826684832573, 'timestamp': '2025-09-30 22:37:50.194421', 'step': 20734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:50.230906', 'step': 20734, 'epoch': 3} {'type': 'loss', 'content': 0.1009349673986435, 'timestamp': '2025-09-30 22:37:50.239368', 'step': 20735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.286933', 'step': 20735, 'epoch': 3} {'type': 'loss', 'content': 0.0478392019867897, 'timestamp': '2025-09-30 22:37:50.313313', 'step': 20736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.349047', 'step': 20736, 'epoch': 3} {'type': 'loss', 'content': 0.05080033093690872, 'timestamp': '2025-09-30 22:37:50.353043', 'step': 20737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.389116', 'step': 20737, 'epoch': 3} {'type': 'loss', 'content': 0.09231897443532944, 'timestamp': '2025-09-30 22:37:50.393900', 'step': 20738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.440052', 'step': 20738, 'epoch': 3} {'type': 'loss', 'content': 0.007457050960510969, 'timestamp': '2025-09-30 22:37:50.442895', 'step': 20739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.479338', 'step': 20739, 'epoch': 3} {'type': 'loss', 'content': 0.1233007088303566, 'timestamp': '2025-09-30 22:37:50.505557', 'step': 20740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.543256', 'step': 20740, 'epoch': 3} {'type': 'loss', 'content': 0.04336334392428398, 'timestamp': '2025-09-30 22:37:50.547520', 'step': 20741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:50.583121', 'step': 20741, 'epoch': 3} {'type': 'loss', 'content': 0.10804414749145508, 'timestamp': '2025-09-30 22:37:50.586811', 'step': 20742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:50.622136', 'step': 20742, 'epoch': 3} {'type': 'loss', 'content': 0.0701567530632019, 'timestamp': '2025-09-30 22:37:50.627546', 'step': 20743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.666216', 'step': 20743, 'epoch': 3} {'type': 'loss', 'content': 0.057217083871364594, 'timestamp': '2025-09-30 22:37:50.692129', 'step': 20744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.727681', 'step': 20744, 'epoch': 3} {'type': 'loss', 'content': 0.09186667203903198, 'timestamp': '2025-09-30 22:37:50.738765', 'step': 20745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:50.773357', 'step': 20745, 'epoch': 3} {'type': 'loss', 'content': 0.09864583611488342, 'timestamp': '2025-09-30 22:37:50.779076', 'step': 20746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:50.814489', 'step': 20746, 'epoch': 3} {'type': 'loss', 'content': 0.06318281590938568, 'timestamp': '2025-09-30 22:37:50.817405', 'step': 20747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.856817', 'step': 20747, 'epoch': 3} {'type': 'loss', 'content': 0.019620850682258606, 'timestamp': '2025-09-30 22:37:50.882717', 'step': 20748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:50.918186', 'step': 20748, 'epoch': 3} {'type': 'loss', 'content': 0.032394446432590485, 'timestamp': '2025-09-30 22:37:50.929778', 'step': 20749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:50.972918', 'step': 20749, 'epoch': 3} {'type': 'loss', 'content': 0.07271857559680939, 'timestamp': '2025-09-30 22:37:50.978042', 'step': 20750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.013507', 'step': 20750, 'epoch': 3} {'type': 'loss', 'content': 0.12452949583530426, 'timestamp': '2025-09-30 22:37:51.018940', 'step': 20751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:51.055799', 'step': 20751, 'epoch': 3} {'type': 'loss', 'content': 0.03448111563920975, 'timestamp': '2025-09-30 22:37:51.081188', 'step': 20752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:51.123432', 'step': 20752, 'epoch': 3} {'type': 'loss', 'content': 0.10429101437330246, 'timestamp': '2025-09-30 22:37:51.127250', 'step': 20753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.173553', 'step': 20753, 'epoch': 3} {'type': 'loss', 'content': 0.07904897630214691, 'timestamp': '2025-09-30 22:37:51.176350', 'step': 20754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.218335', 'step': 20754, 'epoch': 3} {'type': 'loss', 'content': 0.04972098022699356, 'timestamp': '2025-09-30 22:37:51.228448', 'step': 20755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:51.263619', 'step': 20755, 'epoch': 3} {'type': 'loss', 'content': 0.08902599662542343, 'timestamp': '2025-09-30 22:37:51.288694', 'step': 20756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.321907', 'step': 20756, 'epoch': 3} {'type': 'loss', 'content': 0.04848200082778931, 'timestamp': '2025-09-30 22:37:51.327774', 'step': 20757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.363761', 'step': 20757, 'epoch': 3} {'type': 'loss', 'content': 0.038262538611888885, 'timestamp': '2025-09-30 22:37:51.367875', 'step': 20758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.402241', 'step': 20758, 'epoch': 3} {'type': 'loss', 'content': 0.05187361687421799, 'timestamp': '2025-09-30 22:37:51.406364', 'step': 20759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:51.447394', 'step': 20759, 'epoch': 3} {'type': 'loss', 'content': 0.03808889165520668, 'timestamp': '2025-09-30 22:37:51.481261', 'step': 20760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:51.528793', 'step': 20760, 'epoch': 3} {'type': 'loss', 'content': 0.040971048176288605, 'timestamp': '2025-09-30 22:37:51.531312', 'step': 20761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.565435', 'step': 20761, 'epoch': 3} {'type': 'loss', 'content': 0.1318318396806717, 'timestamp': '2025-09-30 22:37:51.574274', 'step': 20762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:51.615848', 'step': 20762, 'epoch': 3} {'type': 'loss', 'content': 0.05090143904089928, 'timestamp': '2025-09-30 22:37:51.619110', 'step': 20763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.674820', 'step': 20763, 'epoch': 3} {'type': 'loss', 'content': 0.06454788893461227, 'timestamp': '2025-09-30 22:37:51.699399', 'step': 20764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:51.734306', 'step': 20764, 'epoch': 3} {'type': 'loss', 'content': 0.10393428057432175, 'timestamp': '2025-09-30 22:37:51.743761', 'step': 20765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:51.778237', 'step': 20765, 'epoch': 3} {'type': 'loss', 'content': 0.047074005007743835, 'timestamp': '2025-09-30 22:37:51.780863', 'step': 20766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:51.815937', 'step': 20766, 'epoch': 3} {'type': 'loss', 'content': 0.10618767142295837, 'timestamp': '2025-09-30 22:37:51.820517', 'step': 20767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:51.854591', 'step': 20767, 'epoch': 3} {'type': 'loss', 'content': 0.03224333003163338, 'timestamp': '2025-09-30 22:37:51.881381', 'step': 20768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:51.917404', 'step': 20768, 'epoch': 3} {'type': 'loss', 'content': 0.0653311014175415, 'timestamp': '2025-09-30 22:37:51.922731', 'step': 20769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:51.973670', 'step': 20769, 'epoch': 3} {'type': 'loss', 'content': 0.11685869097709656, 'timestamp': '2025-09-30 22:37:51.976759', 'step': 20770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:52.010938', 'step': 20770, 'epoch': 3} {'type': 'loss', 'content': 0.036799415946006775, 'timestamp': '2025-09-30 22:37:52.014878', 'step': 20771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.050509', 'step': 20771, 'epoch': 3} {'type': 'loss', 'content': 0.06388638913631439, 'timestamp': '2025-09-30 22:37:52.080824', 'step': 20772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:52.116018', 'step': 20772, 'epoch': 3} {'type': 'loss', 'content': 0.0534047894179821, 'timestamp': '2025-09-30 22:37:52.128475', 'step': 20773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.176254', 'step': 20773, 'epoch': 3} {'type': 'loss', 'content': 0.06893271952867508, 'timestamp': '2025-09-30 22:37:52.181493', 'step': 20774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:52.219427', 'step': 20774, 'epoch': 3} {'type': 'loss', 'content': 0.0989239290356636, 'timestamp': '2025-09-30 22:37:52.223809', 'step': 20775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:52.258555', 'step': 20775, 'epoch': 3} {'type': 'loss', 'content': 0.019602755084633827, 'timestamp': '2025-09-30 22:37:52.284490', 'step': 20776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:52.328726', 'step': 20776, 'epoch': 3} {'type': 'loss', 'content': 0.05749004706740379, 'timestamp': '2025-09-30 22:37:52.334622', 'step': 20777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:52.383573', 'step': 20777, 'epoch': 3} {'type': 'loss', 'content': 0.06862801313400269, 'timestamp': '2025-09-30 22:37:52.386640', 'step': 20778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:52.421455', 'step': 20778, 'epoch': 3} {'type': 'loss', 'content': 0.08366705477237701, 'timestamp': '2025-09-30 22:37:52.424914', 'step': 20779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:52.460800', 'step': 20779, 'epoch': 3} {'type': 'loss', 'content': 0.10848032683134079, 'timestamp': '2025-09-30 22:37:52.493006', 'step': 20780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:52.528685', 'step': 20780, 'epoch': 3} {'type': 'loss', 'content': 0.07799684256315231, 'timestamp': '2025-09-30 22:37:52.532180', 'step': 20781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.575800', 'step': 20781, 'epoch': 3} {'type': 'loss', 'content': 0.06514468789100647, 'timestamp': '2025-09-30 22:37:52.578838', 'step': 20782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:52.620096', 'step': 20782, 'epoch': 3} {'type': 'loss', 'content': 0.05490294098854065, 'timestamp': '2025-09-30 22:37:52.623064', 'step': 20783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:52.659871', 'step': 20783, 'epoch': 3} {'type': 'loss', 'content': 0.01661515422165394, 'timestamp': '2025-09-30 22:37:52.685542', 'step': 20784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:52.721720', 'step': 20784, 'epoch': 3} {'type': 'loss', 'content': 0.0991063341498375, 'timestamp': '2025-09-30 22:37:52.728510', 'step': 20785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.770573', 'step': 20785, 'epoch': 3} {'type': 'loss', 'content': 0.03943709656596184, 'timestamp': '2025-09-30 22:37:52.779705', 'step': 20786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.817698', 'step': 20786, 'epoch': 3} {'type': 'loss', 'content': 0.04922720044851303, 'timestamp': '2025-09-30 22:37:52.831625', 'step': 20787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:52.868226', 'step': 20787, 'epoch': 3} {'type': 'loss', 'content': 0.1058816909790039, 'timestamp': '2025-09-30 22:37:52.899112', 'step': 20788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:52.938479', 'step': 20788, 'epoch': 3} {'type': 'loss', 'content': 0.03434077277779579, 'timestamp': '2025-09-30 22:37:52.942064', 'step': 20789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:52.985036', 'step': 20789, 'epoch': 3} {'type': 'loss', 'content': 0.10875070840120316, 'timestamp': '2025-09-30 22:37:52.993461', 'step': 20790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.042224', 'step': 20790, 'epoch': 3} {'type': 'loss', 'content': 0.056946661323308945, 'timestamp': '2025-09-30 22:37:53.045739', 'step': 20791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.085203', 'step': 20791, 'epoch': 3} {'type': 'loss', 'content': 0.07877340167760849, 'timestamp': '2025-09-30 22:37:53.110673', 'step': 20792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.150783', 'step': 20792, 'epoch': 3} {'type': 'loss', 'content': 0.0770752876996994, 'timestamp': '2025-09-30 22:37:53.154405', 'step': 20793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:53.189893', 'step': 20793, 'epoch': 3} {'type': 'loss', 'content': 0.010573551058769226, 'timestamp': '2025-09-30 22:37:53.194330', 'step': 20794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.230730', 'step': 20794, 'epoch': 3} {'type': 'loss', 'content': 0.06275778263807297, 'timestamp': '2025-09-30 22:37:53.233556', 'step': 20795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.279524', 'step': 20795, 'epoch': 3} {'type': 'loss', 'content': 0.0327448733150959, 'timestamp': '2025-09-30 22:37:53.310131', 'step': 20796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.343790', 'step': 20796, 'epoch': 3} {'type': 'loss', 'content': 0.051642272621393204, 'timestamp': '2025-09-30 22:37:53.348205', 'step': 20797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:53.384603', 'step': 20797, 'epoch': 3} {'type': 'loss', 'content': 0.04406983405351639, 'timestamp': '2025-09-30 22:37:53.392298', 'step': 20798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:53.431499', 'step': 20798, 'epoch': 3} {'type': 'loss', 'content': 0.0833699181675911, 'timestamp': '2025-09-30 22:37:53.437941', 'step': 20799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:53.474457', 'step': 20799, 'epoch': 3} {'type': 'loss', 'content': 0.07613813132047653, 'timestamp': '2025-09-30 22:37:53.500305', 'step': 20800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.534341', 'step': 20800, 'epoch': 3} {'type': 'loss', 'content': 0.1201678216457367, 'timestamp': '2025-09-30 22:37:53.541363', 'step': 20801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:53.584413', 'step': 20801, 'epoch': 3} {'type': 'loss', 'content': 0.04073493927717209, 'timestamp': '2025-09-30 22:37:53.592019', 'step': 20802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:53.631882', 'step': 20802, 'epoch': 3} {'type': 'loss', 'content': 0.0672195702791214, 'timestamp': '2025-09-30 22:37:53.634678', 'step': 20803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.672607', 'step': 20803, 'epoch': 3} {'type': 'loss', 'content': 0.04573105648159981, 'timestamp': '2025-09-30 22:37:53.701929', 'step': 20804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.744128', 'step': 20804, 'epoch': 3} {'type': 'loss', 'content': 0.04119979590177536, 'timestamp': '2025-09-30 22:37:53.749101', 'step': 20805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.784405', 'step': 20805, 'epoch': 3} {'type': 'loss', 'content': 0.14247587323188782, 'timestamp': '2025-09-30 22:37:53.790054', 'step': 20806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.826818', 'step': 20806, 'epoch': 3} {'type': 'loss', 'content': 0.0356539711356163, 'timestamp': '2025-09-30 22:37:53.832032', 'step': 20807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:53.867201', 'step': 20807, 'epoch': 3} {'type': 'loss', 'content': 0.06722752004861832, 'timestamp': '2025-09-30 22:37:53.899647', 'step': 20808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:53.939071', 'step': 20808, 'epoch': 3} {'type': 'loss', 'content': 0.14905604720115662, 'timestamp': '2025-09-30 22:37:53.944065', 'step': 20809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:53.978999', 'step': 20809, 'epoch': 3} {'type': 'loss', 'content': 0.020574823021888733, 'timestamp': '2025-09-30 22:37:53.982779', 'step': 20810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:54.018192', 'step': 20810, 'epoch': 3} {'type': 'loss', 'content': 0.11907820403575897, 'timestamp': '2025-09-30 22:37:54.022445', 'step': 20811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.056738', 'step': 20811, 'epoch': 3} {'type': 'loss', 'content': 0.06749378889799118, 'timestamp': '2025-09-30 22:37:54.084244', 'step': 20812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.117751', 'step': 20812, 'epoch': 3} {'type': 'loss', 'content': 0.05944552645087242, 'timestamp': '2025-09-30 22:37:54.122193', 'step': 20813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.158076', 'step': 20813, 'epoch': 3} {'type': 'loss', 'content': 0.02755357325077057, 'timestamp': '2025-09-30 22:37:54.170774', 'step': 20814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.216555', 'step': 20814, 'epoch': 3} {'type': 'loss', 'content': 0.09630479663610458, 'timestamp': '2025-09-30 22:37:54.221229', 'step': 20815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:54.256574', 'step': 20815, 'epoch': 3} {'type': 'loss', 'content': 0.05233050137758255, 'timestamp': '2025-09-30 22:37:54.295097', 'step': 20816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.331698', 'step': 20816, 'epoch': 3} {'type': 'loss', 'content': 0.08084703981876373, 'timestamp': '2025-09-30 22:37:54.336070', 'step': 20817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.371298', 'step': 20817, 'epoch': 3} {'type': 'loss', 'content': 0.05631290748715401, 'timestamp': '2025-09-30 22:37:54.378426', 'step': 20818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:54.413017', 'step': 20818, 'epoch': 3} {'type': 'loss', 'content': 0.013894103467464447, 'timestamp': '2025-09-30 22:37:54.420898', 'step': 20819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:54.460049', 'step': 20819, 'epoch': 3} {'type': 'loss', 'content': 0.10701142996549606, 'timestamp': '2025-09-30 22:37:54.495378', 'step': 20820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.531229', 'step': 20820, 'epoch': 3} {'type': 'loss', 'content': 0.07436848431825638, 'timestamp': '2025-09-30 22:37:54.534792', 'step': 20821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.569493', 'step': 20821, 'epoch': 3} {'type': 'loss', 'content': 0.033789314329624176, 'timestamp': '2025-09-30 22:37:54.573129', 'step': 20822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.619458', 'step': 20822, 'epoch': 3} {'type': 'loss', 'content': 0.04066784679889679, 'timestamp': '2025-09-30 22:37:54.622650', 'step': 20823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:54.670850', 'step': 20823, 'epoch': 3} {'type': 'loss', 'content': 0.12021929025650024, 'timestamp': '2025-09-30 22:37:54.698558', 'step': 20824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.733484', 'step': 20824, 'epoch': 3} {'type': 'loss', 'content': 0.0912816971540451, 'timestamp': '2025-09-30 22:37:54.736916', 'step': 20825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.771699', 'step': 20825, 'epoch': 3} {'type': 'loss', 'content': 0.11267231404781342, 'timestamp': '2025-09-30 22:37:54.774419', 'step': 20826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:54.807719', 'step': 20826, 'epoch': 3} {'type': 'loss', 'content': 0.05863121524453163, 'timestamp': '2025-09-30 22:37:54.819835', 'step': 20827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:54.863941', 'step': 20827, 'epoch': 3} {'type': 'loss', 'content': 0.10227370262145996, 'timestamp': '2025-09-30 22:37:54.890739', 'step': 20828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:54.925408', 'step': 20828, 'epoch': 3} {'type': 'loss', 'content': 0.07425381243228912, 'timestamp': '2025-09-30 22:37:54.929674', 'step': 20829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:54.965542', 'step': 20829, 'epoch': 3} {'type': 'loss', 'content': 0.09921663254499435, 'timestamp': '2025-09-30 22:37:54.970840', 'step': 20830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.004046', 'step': 20830, 'epoch': 3} {'type': 'loss', 'content': 0.068105548620224, 'timestamp': '2025-09-30 22:37:55.006682', 'step': 20831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:55.039732', 'step': 20831, 'epoch': 3} {'type': 'loss', 'content': 0.06449677050113678, 'timestamp': '2025-09-30 22:37:55.064831', 'step': 20832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:55.100321', 'step': 20832, 'epoch': 3} {'type': 'loss', 'content': 0.07496543228626251, 'timestamp': '2025-09-30 22:37:55.107376', 'step': 20833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.145474', 'step': 20833, 'epoch': 3} {'type': 'loss', 'content': 0.14457793533802032, 'timestamp': '2025-09-30 22:37:55.148516', 'step': 20834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:55.183160', 'step': 20834, 'epoch': 3} {'type': 'loss', 'content': 0.101730577647686, 'timestamp': '2025-09-30 22:37:55.186357', 'step': 20835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:55.221637', 'step': 20835, 'epoch': 3} {'type': 'loss', 'content': 0.10787401348352432, 'timestamp': '2025-09-30 22:37:55.246007', 'step': 20836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:55.281399', 'step': 20836, 'epoch': 3} {'type': 'loss', 'content': 0.05358089879155159, 'timestamp': '2025-09-30 22:37:55.298600', 'step': 20837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:55.334544', 'step': 20837, 'epoch': 3} {'type': 'loss', 'content': 0.015508264303207397, 'timestamp': '2025-09-30 22:37:55.343224', 'step': 20838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.377646', 'step': 20838, 'epoch': 3} {'type': 'loss', 'content': 0.11212001740932465, 'timestamp': '2025-09-30 22:37:55.380701', 'step': 20839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.415348', 'step': 20839, 'epoch': 3} {'type': 'loss', 'content': 0.08646350353956223, 'timestamp': '2025-09-30 22:37:55.441137', 'step': 20840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.475164', 'step': 20840, 'epoch': 3} {'type': 'loss', 'content': 0.0460033155977726, 'timestamp': '2025-09-30 22:37:55.480504', 'step': 20841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:55.514837', 'step': 20841, 'epoch': 3} {'type': 'loss', 'content': 0.10574499517679214, 'timestamp': '2025-09-30 22:37:55.518246', 'step': 20842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.552505', 'step': 20842, 'epoch': 3} {'type': 'loss', 'content': 0.08413489162921906, 'timestamp': '2025-09-30 22:37:55.557700', 'step': 20843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:55.608065', 'step': 20843, 'epoch': 3} {'type': 'loss', 'content': 0.04553620517253876, 'timestamp': '2025-09-30 22:37:55.634469', 'step': 20844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:55.676184', 'step': 20844, 'epoch': 3} {'type': 'loss', 'content': 0.013566477224230766, 'timestamp': '2025-09-30 22:37:55.679536', 'step': 20845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:55.724288', 'step': 20845, 'epoch': 3} {'type': 'loss', 'content': 0.06107622757554054, 'timestamp': '2025-09-30 22:37:55.734266', 'step': 20846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.778189', 'step': 20846, 'epoch': 3} {'type': 'loss', 'content': 0.022553084418177605, 'timestamp': '2025-09-30 22:37:55.781805', 'step': 20847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.815864', 'step': 20847, 'epoch': 3} {'type': 'loss', 'content': 0.026331579312682152, 'timestamp': '2025-09-30 22:37:55.846205', 'step': 20848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.882243', 'step': 20848, 'epoch': 3} {'type': 'loss', 'content': 0.06703712046146393, 'timestamp': '2025-09-30 22:37:55.886484', 'step': 20849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.933097', 'step': 20849, 'epoch': 3} {'type': 'loss', 'content': 0.03160550445318222, 'timestamp': '2025-09-30 22:37:55.939595', 'step': 20850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:55.977636', 'step': 20850, 'epoch': 3} {'type': 'loss', 'content': 0.046619512140750885, 'timestamp': '2025-09-30 22:37:55.981788', 'step': 20851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.015482', 'step': 20851, 'epoch': 3} {'type': 'loss', 'content': 0.07364827394485474, 'timestamp': '2025-09-30 22:37:56.042205', 'step': 20852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:56.079744', 'step': 20852, 'epoch': 3} {'type': 'loss', 'content': 0.1448359191417694, 'timestamp': '2025-09-30 22:37:56.099828', 'step': 20853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.143161', 'step': 20853, 'epoch': 3} {'type': 'loss', 'content': 0.09216172993183136, 'timestamp': '2025-09-30 22:37:56.155174', 'step': 20854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:56.200851', 'step': 20854, 'epoch': 3} {'type': 'loss', 'content': 0.06829433143138885, 'timestamp': '2025-09-30 22:37:56.204854', 'step': 20855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:56.251781', 'step': 20855, 'epoch': 3} {'type': 'loss', 'content': 0.08368492871522903, 'timestamp': '2025-09-30 22:37:56.286914', 'step': 20856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:56.322620', 'step': 20856, 'epoch': 3} {'type': 'loss', 'content': 0.01345847174525261, 'timestamp': '2025-09-30 22:37:56.329319', 'step': 20857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:56.379474', 'step': 20857, 'epoch': 3} {'type': 'loss', 'content': 0.05370183289051056, 'timestamp': '2025-09-30 22:37:56.383475', 'step': 20858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:56.422130', 'step': 20858, 'epoch': 3} {'type': 'loss', 'content': 0.05762549862265587, 'timestamp': '2025-09-30 22:37:56.426354', 'step': 20859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.462032', 'step': 20859, 'epoch': 3} {'type': 'loss', 'content': 0.09518361836671829, 'timestamp': '2025-09-30 22:37:56.488986', 'step': 20860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.533998', 'step': 20860, 'epoch': 3} {'type': 'loss', 'content': 0.07162369042634964, 'timestamp': '2025-09-30 22:37:56.538164', 'step': 20861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:56.579576', 'step': 20861, 'epoch': 3} {'type': 'loss', 'content': 0.08485490083694458, 'timestamp': '2025-09-30 22:37:56.583629', 'step': 20862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:56.620086', 'step': 20862, 'epoch': 3} {'type': 'loss', 'content': 0.02109801582992077, 'timestamp': '2025-09-30 22:37:56.624434', 'step': 20863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:56.665032', 'step': 20863, 'epoch': 3} {'type': 'loss', 'content': 0.04665203392505646, 'timestamp': '2025-09-30 22:37:56.690157', 'step': 20864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:56.726334', 'step': 20864, 'epoch': 3} {'type': 'loss', 'content': 0.15200872719287872, 'timestamp': '2025-09-30 22:37:56.729827', 'step': 20865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:56.763815', 'step': 20865, 'epoch': 3} {'type': 'loss', 'content': 0.060981933027505875, 'timestamp': '2025-09-30 22:37:56.768587', 'step': 20866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:56.805047', 'step': 20866, 'epoch': 3} {'type': 'loss', 'content': 0.086260586977005, 'timestamp': '2025-09-30 22:37:56.808561', 'step': 20867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.845712', 'step': 20867, 'epoch': 3} {'type': 'loss', 'content': 0.09398867934942245, 'timestamp': '2025-09-30 22:37:56.876636', 'step': 20868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:56.911307', 'step': 20868, 'epoch': 3} {'type': 'loss', 'content': 0.06391984969377518, 'timestamp': '2025-09-30 22:37:56.922442', 'step': 20869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:56.957104', 'step': 20869, 'epoch': 3} {'type': 'loss', 'content': 0.07557280361652374, 'timestamp': '2025-09-30 22:37:56.960856', 'step': 20870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:56.996197', 'step': 20870, 'epoch': 3} {'type': 'loss', 'content': 0.03045780025422573, 'timestamp': '2025-09-30 22:37:56.999630', 'step': 20871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.034419', 'step': 20871, 'epoch': 3} {'type': 'loss', 'content': 0.10607099533081055, 'timestamp': '2025-09-30 22:37:57.058956', 'step': 20872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.100211', 'step': 20872, 'epoch': 3} {'type': 'loss', 'content': 0.09534267336130142, 'timestamp': '2025-09-30 22:37:57.107037', 'step': 20873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:57.149226', 'step': 20873, 'epoch': 3} {'type': 'loss', 'content': 0.05139534920454025, 'timestamp': '2025-09-30 22:37:57.153686', 'step': 20874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.204831', 'step': 20874, 'epoch': 3} {'type': 'loss', 'content': 0.058219995349645615, 'timestamp': '2025-09-30 22:37:57.208059', 'step': 20875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:57.256860', 'step': 20875, 'epoch': 3} {'type': 'loss', 'content': 0.036186620593070984, 'timestamp': '2025-09-30 22:37:57.282647', 'step': 20876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:57.316789', 'step': 20876, 'epoch': 3} {'type': 'loss', 'content': 0.06420796364545822, 'timestamp': '2025-09-30 22:37:57.327879', 'step': 20877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:57.365443', 'step': 20877, 'epoch': 3} {'type': 'loss', 'content': 0.046527210623025894, 'timestamp': '2025-09-30 22:37:57.379422', 'step': 20878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:57.419340', 'step': 20878, 'epoch': 3} {'type': 'loss', 'content': 0.12387216836214066, 'timestamp': '2025-09-30 22:37:57.426489', 'step': 20879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:57.460628', 'step': 20879, 'epoch': 3} {'type': 'loss', 'content': 0.051840126514434814, 'timestamp': '2025-09-30 22:37:57.485908', 'step': 20880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:57.544541', 'step': 20880, 'epoch': 3} {'type': 'loss', 'content': 0.036869268864393234, 'timestamp': '2025-09-30 22:37:57.554646', 'step': 20881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.592619', 'step': 20881, 'epoch': 3} {'type': 'loss', 'content': 0.04341009259223938, 'timestamp': '2025-09-30 22:37:57.604281', 'step': 20882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.647213', 'step': 20882, 'epoch': 3} {'type': 'loss', 'content': 0.011184746399521828, 'timestamp': '2025-09-30 22:37:57.658059', 'step': 20883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:57.707845', 'step': 20883, 'epoch': 3} {'type': 'loss', 'content': 0.02223733812570572, 'timestamp': '2025-09-30 22:37:57.735242', 'step': 20884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:57.772715', 'step': 20884, 'epoch': 3} {'type': 'loss', 'content': 0.05216995254158974, 'timestamp': '2025-09-30 22:37:57.776896', 'step': 20885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:57.813082', 'step': 20885, 'epoch': 3} {'type': 'loss', 'content': 0.04278712347149849, 'timestamp': '2025-09-30 22:37:57.817482', 'step': 20886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:57.852578', 'step': 20886, 'epoch': 3} {'type': 'loss', 'content': 0.040761373937129974, 'timestamp': '2025-09-30 22:37:57.858365', 'step': 20887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:57.904621', 'step': 20887, 'epoch': 3} {'type': 'loss', 'content': 0.10218073427677155, 'timestamp': '2025-09-30 22:37:57.931989', 'step': 20888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:57.967607', 'step': 20888, 'epoch': 3} {'type': 'loss', 'content': 0.045144885778427124, 'timestamp': '2025-09-30 22:37:57.976028', 'step': 20889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.011039', 'step': 20889, 'epoch': 3} {'type': 'loss', 'content': 0.09831342846155167, 'timestamp': '2025-09-30 22:37:58.013863', 'step': 20890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:37:58.062433', 'step': 20890, 'epoch': 3} {'type': 'loss', 'content': 0.021982697769999504, 'timestamp': '2025-09-30 22:37:58.064735', 'step': 20891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 208], 'flops': 6170111174080}, 'timestamp': '2025-09-30 22:37:58.110401', 'step': 20891, 'epoch': 3} {'type': 'loss', 'content': 0.03472400829195976, 'timestamp': '2025-09-30 22:37:58.136806', 'step': 20892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.171164', 'step': 20892, 'epoch': 3} {'type': 'loss', 'content': 0.11034341901540756, 'timestamp': '2025-09-30 22:37:58.173677', 'step': 20893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.206898', 'step': 20893, 'epoch': 3} {'type': 'loss', 'content': 0.05256747454404831, 'timestamp': '2025-09-30 22:37:58.216356', 'step': 20894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.253245', 'step': 20894, 'epoch': 3} {'type': 'loss', 'content': 0.06524194031953812, 'timestamp': '2025-09-30 22:37:58.256879', 'step': 20895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.307588', 'step': 20895, 'epoch': 3} {'type': 'loss', 'content': 0.0690784752368927, 'timestamp': '2025-09-30 22:37:58.333870', 'step': 20896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.386085', 'step': 20896, 'epoch': 3} {'type': 'loss', 'content': 0.078152596950531, 'timestamp': '2025-09-30 22:37:58.396844', 'step': 20897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.431749', 'step': 20897, 'epoch': 3} {'type': 'loss', 'content': 0.08988575637340546, 'timestamp': '2025-09-30 22:37:58.435151', 'step': 20898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:37:58.470063', 'step': 20898, 'epoch': 3} {'type': 'loss', 'content': 0.07506439834833145, 'timestamp': '2025-09-30 22:37:58.477152', 'step': 20899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:37:58.514032', 'step': 20899, 'epoch': 3} {'type': 'loss', 'content': 0.05994144082069397, 'timestamp': '2025-09-30 22:37:58.539323', 'step': 20900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:58.575731', 'step': 20900, 'epoch': 3} {'type': 'loss', 'content': 0.047211308032274246, 'timestamp': '2025-09-30 22:37:58.580734', 'step': 20901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.615698', 'step': 20901, 'epoch': 3} {'type': 'loss', 'content': 0.07047606259584427, 'timestamp': '2025-09-30 22:37:58.623592', 'step': 20902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.663323', 'step': 20902, 'epoch': 3} {'type': 'loss', 'content': 0.09883305430412292, 'timestamp': '2025-09-30 22:37:58.667672', 'step': 20903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:58.712801', 'step': 20903, 'epoch': 3} {'type': 'loss', 'content': 0.08486925810575485, 'timestamp': '2025-09-30 22:37:58.744996', 'step': 20904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.780716', 'step': 20904, 'epoch': 3} {'type': 'loss', 'content': 0.05735067278146744, 'timestamp': '2025-09-30 22:37:58.784664', 'step': 20905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:58.829734', 'step': 20905, 'epoch': 3} {'type': 'loss', 'content': 0.06996568292379379, 'timestamp': '2025-09-30 22:37:58.838540', 'step': 20906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:58.880474', 'step': 20906, 'epoch': 3} {'type': 'loss', 'content': 0.011091138236224651, 'timestamp': '2025-09-30 22:37:58.885551', 'step': 20907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:58.919844', 'step': 20907, 'epoch': 3} {'type': 'loss', 'content': 0.05080742388963699, 'timestamp': '2025-09-30 22:37:58.945325', 'step': 20908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:58.979797', 'step': 20908, 'epoch': 3} {'type': 'loss', 'content': 0.01764059253036976, 'timestamp': '2025-09-30 22:37:58.983525', 'step': 20909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.016406', 'step': 20909, 'epoch': 3} {'type': 'loss', 'content': 0.14105668663978577, 'timestamp': '2025-09-30 22:37:59.021059', 'step': 20910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.080748', 'step': 20910, 'epoch': 3} {'type': 'loss', 'content': 0.05999475717544556, 'timestamp': '2025-09-30 22:37:59.085344', 'step': 20911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.117633', 'step': 20911, 'epoch': 3} {'type': 'loss', 'content': 0.02941892482340336, 'timestamp': '2025-09-30 22:37:59.143604', 'step': 20912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:59.176282', 'step': 20912, 'epoch': 3} {'type': 'loss', 'content': 0.04889466613531113, 'timestamp': '2025-09-30 22:37:59.179941', 'step': 20913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.213462', 'step': 20913, 'epoch': 3} {'type': 'loss', 'content': 0.13264445960521698, 'timestamp': '2025-09-30 22:37:59.226649', 'step': 20914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:59.267395', 'step': 20914, 'epoch': 3} {'type': 'loss', 'content': 0.13773150742053986, 'timestamp': '2025-09-30 22:37:59.280878', 'step': 20915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:59.320702', 'step': 20915, 'epoch': 3} {'type': 'loss', 'content': 0.06881017237901688, 'timestamp': '2025-09-30 22:37:59.353740', 'step': 20916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.384163', 'step': 20916, 'epoch': 3} {'type': 'loss', 'content': 0.0897640809416771, 'timestamp': '2025-09-30 22:37:59.390905', 'step': 20917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:59.424210', 'step': 20917, 'epoch': 3} {'type': 'loss', 'content': 0.07630191743373871, 'timestamp': '2025-09-30 22:37:59.426709', 'step': 20918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:59.459084', 'step': 20918, 'epoch': 3} {'type': 'loss', 'content': 0.1344156116247177, 'timestamp': '2025-09-30 22:37:59.474359', 'step': 20919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:37:59.517633', 'step': 20919, 'epoch': 3} {'type': 'loss', 'content': 0.12284041941165924, 'timestamp': '2025-09-30 22:37:59.542315', 'step': 20920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.578932', 'step': 20920, 'epoch': 3} {'type': 'loss', 'content': 0.08026842772960663, 'timestamp': '2025-09-30 22:37:59.591608', 'step': 20921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:59.625284', 'step': 20921, 'epoch': 3} {'type': 'loss', 'content': 0.07213255763053894, 'timestamp': '2025-09-30 22:37:59.636892', 'step': 20922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.680720', 'step': 20922, 'epoch': 3} {'type': 'loss', 'content': 0.08245804905891418, 'timestamp': '2025-09-30 22:37:59.683928', 'step': 20923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.717357', 'step': 20923, 'epoch': 3} {'type': 'loss', 'content': 0.06918457895517349, 'timestamp': '2025-09-30 22:37:59.750799', 'step': 20924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.784714', 'step': 20924, 'epoch': 3} {'type': 'loss', 'content': 0.1051538735628128, 'timestamp': '2025-09-30 22:37:59.798846', 'step': 20925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:37:59.833456', 'step': 20925, 'epoch': 3} {'type': 'loss', 'content': 0.026567138731479645, 'timestamp': '2025-09-30 22:37:59.838304', 'step': 20926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:37:59.880019', 'step': 20926, 'epoch': 3} {'type': 'loss', 'content': 0.0980994701385498, 'timestamp': '2025-09-30 22:37:59.895268', 'step': 20927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:37:59.929839', 'step': 20927, 'epoch': 3} {'type': 'loss', 'content': 0.03846186771988869, 'timestamp': '2025-09-30 22:37:59.968981', 'step': 20928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.006396', 'step': 20928, 'epoch': 3} {'type': 'loss', 'content': 0.08528327941894531, 'timestamp': '2025-09-30 22:38:00.010294', 'step': 20929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.046615', 'step': 20929, 'epoch': 3} {'type': 'loss', 'content': 0.06935141980648041, 'timestamp': '2025-09-30 22:38:00.051227', 'step': 20930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.087298', 'step': 20930, 'epoch': 3} {'type': 'loss', 'content': 0.09496329724788666, 'timestamp': '2025-09-30 22:38:00.091241', 'step': 20931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:00.126391', 'step': 20931, 'epoch': 3} {'type': 'loss', 'content': 0.016671478748321533, 'timestamp': '2025-09-30 22:38:00.152246', 'step': 20932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:00.186769', 'step': 20932, 'epoch': 3} {'type': 'loss', 'content': 0.12042967230081558, 'timestamp': '2025-09-30 22:38:00.190606', 'step': 20933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.234829', 'step': 20933, 'epoch': 3} {'type': 'loss', 'content': 0.07447696477174759, 'timestamp': '2025-09-30 22:38:00.245205', 'step': 20934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:00.288873', 'step': 20934, 'epoch': 3} {'type': 'loss', 'content': 0.04068148136138916, 'timestamp': '2025-09-30 22:38:00.306515', 'step': 20935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.343105', 'step': 20935, 'epoch': 3} {'type': 'loss', 'content': 0.08638325333595276, 'timestamp': '2025-09-30 22:38:00.375498', 'step': 20936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.414932', 'step': 20936, 'epoch': 3} {'type': 'loss', 'content': 0.053140994161367416, 'timestamp': '2025-09-30 22:38:00.429965', 'step': 20937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.466525', 'step': 20937, 'epoch': 3} {'type': 'loss', 'content': 0.05126744881272316, 'timestamp': '2025-09-30 22:38:00.471132', 'step': 20938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.509325', 'step': 20938, 'epoch': 3} {'type': 'loss', 'content': 0.04077748954296112, 'timestamp': '2025-09-30 22:38:00.513599', 'step': 20939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.547205', 'step': 20939, 'epoch': 3} {'type': 'loss', 'content': 0.1014183759689331, 'timestamp': '2025-09-30 22:38:00.585458', 'step': 20940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.623879', 'step': 20940, 'epoch': 3} {'type': 'loss', 'content': 0.020754504948854446, 'timestamp': '2025-09-30 22:38:00.628390', 'step': 20941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.678822', 'step': 20941, 'epoch': 3} {'type': 'loss', 'content': 0.050354719161987305, 'timestamp': '2025-09-30 22:38:00.686292', 'step': 20942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.720156', 'step': 20942, 'epoch': 3} {'type': 'loss', 'content': 0.05228417366743088, 'timestamp': '2025-09-30 22:38:00.725776', 'step': 20943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.764891', 'step': 20943, 'epoch': 3} {'type': 'loss', 'content': 0.12833520770072937, 'timestamp': '2025-09-30 22:38:00.800826', 'step': 20944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.836280', 'step': 20944, 'epoch': 3} {'type': 'loss', 'content': 0.046991512179374695, 'timestamp': '2025-09-30 22:38:00.841669', 'step': 20945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:00.917621', 'step': 20945, 'epoch': 3} {'type': 'loss', 'content': 0.06881585717201233, 'timestamp': '2025-09-30 22:38:00.920871', 'step': 20946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.960171', 'step': 20946, 'epoch': 3} {'type': 'loss', 'content': 0.019892748445272446, 'timestamp': '2025-09-30 22:38:00.964193', 'step': 20947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:00.996440', 'step': 20947, 'epoch': 3} {'type': 'loss', 'content': 0.10697125643491745, 'timestamp': '2025-09-30 22:38:01.024051', 'step': 20948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.062300', 'step': 20948, 'epoch': 3} {'type': 'loss', 'content': 0.10635577887296677, 'timestamp': '2025-09-30 22:38:01.081319', 'step': 20949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:01.126406', 'step': 20949, 'epoch': 3} {'type': 'loss', 'content': 0.09714936465024948, 'timestamp': '2025-09-30 22:38:01.141853', 'step': 20950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.187595', 'step': 20950, 'epoch': 3} {'type': 'loss', 'content': 0.06688982248306274, 'timestamp': '2025-09-30 22:38:01.192895', 'step': 20951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:01.225308', 'step': 20951, 'epoch': 3} {'type': 'loss', 'content': 0.06126497685909271, 'timestamp': '2025-09-30 22:38:01.260864', 'step': 20952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.301210', 'step': 20952, 'epoch': 3} {'type': 'loss', 'content': 0.04876793548464775, 'timestamp': '2025-09-30 22:38:01.314250', 'step': 20953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.348993', 'step': 20953, 'epoch': 3} {'type': 'loss', 'content': 0.11221190541982651, 'timestamp': '2025-09-30 22:38:01.354334', 'step': 20954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:01.398184', 'step': 20954, 'epoch': 3} {'type': 'loss', 'content': 0.13121181726455688, 'timestamp': '2025-09-30 22:38:01.411922', 'step': 20955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.464267', 'step': 20955, 'epoch': 3} {'type': 'loss', 'content': 0.12334150075912476, 'timestamp': '2025-09-30 22:38:01.505308', 'step': 20956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:01.540273', 'step': 20956, 'epoch': 3} {'type': 'loss', 'content': 0.02117704413831234, 'timestamp': '2025-09-30 22:38:01.554662', 'step': 20957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:01.605395', 'step': 20957, 'epoch': 3} {'type': 'loss', 'content': 0.05102820321917534, 'timestamp': '2025-09-30 22:38:01.618446', 'step': 20958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.650874', 'step': 20958, 'epoch': 3} {'type': 'loss', 'content': 0.08318372815847397, 'timestamp': '2025-09-30 22:38:01.655347', 'step': 20959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.688709', 'step': 20959, 'epoch': 3} {'type': 'loss', 'content': 0.04936879873275757, 'timestamp': '2025-09-30 22:38:01.722080', 'step': 20960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:01.766794', 'step': 20960, 'epoch': 3} {'type': 'loss', 'content': 0.09800982475280762, 'timestamp': '2025-09-30 22:38:01.779804', 'step': 20961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.820745', 'step': 20961, 'epoch': 3} {'type': 'loss', 'content': 0.013681825250387192, 'timestamp': '2025-09-30 22:38:01.825611', 'step': 20962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:01.858003', 'step': 20962, 'epoch': 3} {'type': 'loss', 'content': 0.08550059795379639, 'timestamp': '2025-09-30 22:38:01.862230', 'step': 20963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:01.896176', 'step': 20963, 'epoch': 3} {'type': 'loss', 'content': 0.032690200954675674, 'timestamp': '2025-09-30 22:38:01.921697', 'step': 20964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:01.956381', 'step': 20964, 'epoch': 3} {'type': 'loss', 'content': 0.07130594551563263, 'timestamp': '2025-09-30 22:38:01.959938', 'step': 20965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:01.992394', 'step': 20965, 'epoch': 3} {'type': 'loss', 'content': 0.058302536606788635, 'timestamp': '2025-09-30 22:38:01.996116', 'step': 20966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:02.027964', 'step': 20966, 'epoch': 3} {'type': 'loss', 'content': 0.035218581557273865, 'timestamp': '2025-09-30 22:38:02.032372', 'step': 20967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:02.068633', 'step': 20967, 'epoch': 3} {'type': 'loss', 'content': 0.04017970338463783, 'timestamp': '2025-09-30 22:38:02.093239', 'step': 20968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:02.126636', 'step': 20968, 'epoch': 3} {'type': 'loss', 'content': 0.050922196358442307, 'timestamp': '2025-09-30 22:38:02.131856', 'step': 20969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:02.165012', 'step': 20969, 'epoch': 3} {'type': 'loss', 'content': 0.030042732134461403, 'timestamp': '2025-09-30 22:38:02.168336', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:38:10.143476', 'step': 20970, 'epoch': 3} {'type': 'pplx', 'content': 9857.728686860955, 'timestamp': '2025-09-30 22:38:10.159305', 'step': 20970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:10.193915', 'step': 20970, 'epoch': 3} {'type': 'loss', 'content': 0.11650208383798599, 'timestamp': '2025-09-30 22:38:10.199607', 'step': 20971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:10.237162', 'step': 20971, 'epoch': 3} {'type': 'loss', 'content': 0.062302347272634506, 'timestamp': '2025-09-30 22:38:10.271728', 'step': 20972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:10.302937', 'step': 20972, 'epoch': 3} {'type': 'loss', 'content': 0.029206814244389534, 'timestamp': '2025-09-30 22:38:10.307304', 'step': 20973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.347325', 'step': 20973, 'epoch': 3} {'type': 'loss', 'content': 0.03216049447655678, 'timestamp': '2025-09-30 22:38:10.353748', 'step': 20974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:10.387881', 'step': 20974, 'epoch': 3} {'type': 'loss', 'content': 0.08489077538251877, 'timestamp': '2025-09-30 22:38:10.397512', 'step': 20975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.432986', 'step': 20975, 'epoch': 3} {'type': 'loss', 'content': 0.04068460315465927, 'timestamp': '2025-09-30 22:38:10.464544', 'step': 20976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.496172', 'step': 20976, 'epoch': 3} {'type': 'loss', 'content': 0.054407138377428055, 'timestamp': '2025-09-30 22:38:10.516456', 'step': 20977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:10.566382', 'step': 20977, 'epoch': 3} {'type': 'loss', 'content': 0.08256763964891434, 'timestamp': '2025-09-30 22:38:10.571466', 'step': 20978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:10.602691', 'step': 20978, 'epoch': 3} {'type': 'loss', 'content': 0.1326933652162552, 'timestamp': '2025-09-30 22:38:10.616083', 'step': 20979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.648160', 'step': 20979, 'epoch': 3} {'type': 'loss', 'content': 0.07395250350236893, 'timestamp': '2025-09-30 22:38:10.682230', 'step': 20980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:10.718994', 'step': 20980, 'epoch': 3} {'type': 'loss', 'content': 0.0631754994392395, 'timestamp': '2025-09-30 22:38:10.739855', 'step': 20981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:10.779988', 'step': 20981, 'epoch': 3} {'type': 'loss', 'content': 0.09782689809799194, 'timestamp': '2025-09-30 22:38:10.793291', 'step': 20982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:10.834904', 'step': 20982, 'epoch': 3} {'type': 'loss', 'content': 0.026661599054932594, 'timestamp': '2025-09-30 22:38:10.838234', 'step': 20983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.871958', 'step': 20983, 'epoch': 3} {'type': 'loss', 'content': 0.07286819815635681, 'timestamp': '2025-09-30 22:38:10.904245', 'step': 20984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:10.942352', 'step': 20984, 'epoch': 3} {'type': 'loss', 'content': 0.07902135699987411, 'timestamp': '2025-09-30 22:38:10.954532', 'step': 20985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:10.997941', 'step': 20985, 'epoch': 3} {'type': 'loss', 'content': 0.03944728150963783, 'timestamp': '2025-09-30 22:38:11.002545', 'step': 20986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:11.037382', 'step': 20986, 'epoch': 3} {'type': 'loss', 'content': 0.07902336120605469, 'timestamp': '2025-09-30 22:38:11.049540', 'step': 20987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:11.088864', 'step': 20987, 'epoch': 3} {'type': 'loss', 'content': 0.08135710656642914, 'timestamp': '2025-09-30 22:38:11.120381', 'step': 20988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:11.153402', 'step': 20988, 'epoch': 3} {'type': 'loss', 'content': 0.04615272954106331, 'timestamp': '2025-09-30 22:38:11.159417', 'step': 20989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:11.200328', 'step': 20989, 'epoch': 3} {'type': 'loss', 'content': 0.03576936200261116, 'timestamp': '2025-09-30 22:38:11.207012', 'step': 20990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:11.238561', 'step': 20990, 'epoch': 3} {'type': 'loss', 'content': 0.07910586148500443, 'timestamp': '2025-09-30 22:38:11.242855', 'step': 20991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.279120', 'step': 20991, 'epoch': 3} {'type': 'loss', 'content': 0.039456628262996674, 'timestamp': '2025-09-30 22:38:11.305604', 'step': 20992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:11.342807', 'step': 20992, 'epoch': 3} {'type': 'loss', 'content': 0.07090284675359726, 'timestamp': '2025-09-30 22:38:11.346899', 'step': 20993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.385451', 'step': 20993, 'epoch': 3} {'type': 'loss', 'content': 0.07257393002510071, 'timestamp': '2025-09-30 22:38:11.390813', 'step': 20994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.426143', 'step': 20994, 'epoch': 3} {'type': 'loss', 'content': 0.05984928086400032, 'timestamp': '2025-09-30 22:38:11.440825', 'step': 20995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:11.478021', 'step': 20995, 'epoch': 3} {'type': 'loss', 'content': 0.1002662256360054, 'timestamp': '2025-09-30 22:38:11.513362', 'step': 20996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:11.571303', 'step': 20996, 'epoch': 3} {'type': 'loss', 'content': 0.03262752667069435, 'timestamp': '2025-09-30 22:38:11.584176', 'step': 20997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.623553', 'step': 20997, 'epoch': 3} {'type': 'loss', 'content': 0.0490177683532238, 'timestamp': '2025-09-30 22:38:11.629321', 'step': 20998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.662697', 'step': 20998, 'epoch': 3} {'type': 'loss', 'content': 0.07750024646520615, 'timestamp': '2025-09-30 22:38:11.668822', 'step': 20999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:11.700099', 'step': 20999, 'epoch': 3} {'type': 'loss', 'content': 0.07052311301231384, 'timestamp': '2025-09-30 22:38:11.726217', 'step': 21000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21000', 'timestamp': '2025-09-30 22:38:16.627916', 'step': 21000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:16.674184', 'step': 21000, 'epoch': 3} {'type': 'loss', 'content': 0.10858342796564102, 'timestamp': '2025-09-30 22:38:16.678803', 'step': 21001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:16.712698', 'step': 21001, 'epoch': 3} {'type': 'loss', 'content': 0.05644100531935692, 'timestamp': '2025-09-30 22:38:16.716743', 'step': 21002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:16.749610', 'step': 21002, 'epoch': 3} {'type': 'loss', 'content': 0.0847766324877739, 'timestamp': '2025-09-30 22:38:16.753883', 'step': 21003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:16.789133', 'step': 21003, 'epoch': 3} {'type': 'loss', 'content': 0.06534582376480103, 'timestamp': '2025-09-30 22:38:16.815806', 'step': 21004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:16.846286', 'step': 21004, 'epoch': 3} {'type': 'loss', 'content': 0.06352679431438446, 'timestamp': '2025-09-30 22:38:16.850601', 'step': 21005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:16.883509', 'step': 21005, 'epoch': 3} {'type': 'loss', 'content': 0.07942676544189453, 'timestamp': '2025-09-30 22:38:16.890229', 'step': 21006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:16.930659', 'step': 21006, 'epoch': 3} {'type': 'loss', 'content': 0.07406558096408844, 'timestamp': '2025-09-30 22:38:16.936437', 'step': 21007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:16.968554', 'step': 21007, 'epoch': 3} {'type': 'loss', 'content': 0.07974778115749359, 'timestamp': '2025-09-30 22:38:16.995290', 'step': 21008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.027839', 'step': 21008, 'epoch': 3} {'type': 'loss', 'content': 0.026693910360336304, 'timestamp': '2025-09-30 22:38:17.032468', 'step': 21009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.075400', 'step': 21009, 'epoch': 3} {'type': 'loss', 'content': 0.04094511643052101, 'timestamp': '2025-09-30 22:38:17.089415', 'step': 21010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:17.129492', 'step': 21010, 'epoch': 3} {'type': 'loss', 'content': 0.06385749578475952, 'timestamp': '2025-09-30 22:38:17.135179', 'step': 21011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:17.167859', 'step': 21011, 'epoch': 3} {'type': 'loss', 'content': 0.09288521856069565, 'timestamp': '2025-09-30 22:38:17.194733', 'step': 21012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.240782', 'step': 21012, 'epoch': 3} {'type': 'loss', 'content': 0.0871785581111908, 'timestamp': '2025-09-30 22:38:17.243992', 'step': 21013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.289857', 'step': 21013, 'epoch': 3} {'type': 'loss', 'content': 0.06721281260251999, 'timestamp': '2025-09-30 22:38:17.306665', 'step': 21014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.338535', 'step': 21014, 'epoch': 3} {'type': 'loss', 'content': 0.12436419725418091, 'timestamp': '2025-09-30 22:38:17.344426', 'step': 21015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.375590', 'step': 21015, 'epoch': 3} {'type': 'loss', 'content': 0.08819101750850677, 'timestamp': '2025-09-30 22:38:17.407258', 'step': 21016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:17.451617', 'step': 21016, 'epoch': 3} {'type': 'loss', 'content': 0.0879216268658638, 'timestamp': '2025-09-30 22:38:17.458541', 'step': 21017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.497285', 'step': 21017, 'epoch': 3} {'type': 'loss', 'content': 0.08163780719041824, 'timestamp': '2025-09-30 22:38:17.500914', 'step': 21018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:17.535824', 'step': 21018, 'epoch': 3} {'type': 'loss', 'content': 0.08240987360477448, 'timestamp': '2025-09-30 22:38:17.539052', 'step': 21019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:17.571628', 'step': 21019, 'epoch': 3} {'type': 'loss', 'content': 0.046315934509038925, 'timestamp': '2025-09-30 22:38:17.598348', 'step': 21020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:17.631281', 'step': 21020, 'epoch': 3} {'type': 'loss', 'content': 0.05666615813970566, 'timestamp': '2025-09-30 22:38:17.635200', 'step': 21021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.670385', 'step': 21021, 'epoch': 3} {'type': 'loss', 'content': 0.049889806658029556, 'timestamp': '2025-09-30 22:38:17.677137', 'step': 21022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:17.710830', 'step': 21022, 'epoch': 3} {'type': 'loss', 'content': 0.09623131155967712, 'timestamp': '2025-09-30 22:38:17.716599', 'step': 21023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:17.761874', 'step': 21023, 'epoch': 3} {'type': 'loss', 'content': 0.052039701491594315, 'timestamp': '2025-09-30 22:38:17.788583', 'step': 21024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:17.820820', 'step': 21024, 'epoch': 3} {'type': 'loss', 'content': 0.05989855155348778, 'timestamp': '2025-09-30 22:38:17.825464', 'step': 21025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:17.871924', 'step': 21025, 'epoch': 3} {'type': 'loss', 'content': 0.11681367456912994, 'timestamp': '2025-09-30 22:38:17.875979', 'step': 21026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:17.908044', 'step': 21026, 'epoch': 3} {'type': 'loss', 'content': 0.13082724809646606, 'timestamp': '2025-09-30 22:38:17.921832', 'step': 21027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:17.965519', 'step': 21027, 'epoch': 3} {'type': 'loss', 'content': 0.07427772134542465, 'timestamp': '2025-09-30 22:38:17.993314', 'step': 21028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:18.025143', 'step': 21028, 'epoch': 3} {'type': 'loss', 'content': 0.07379168272018433, 'timestamp': '2025-09-30 22:38:18.029447', 'step': 21029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.062700', 'step': 21029, 'epoch': 3} {'type': 'loss', 'content': 0.05271272361278534, 'timestamp': '2025-09-30 22:38:18.081483', 'step': 21030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.113101', 'step': 21030, 'epoch': 3} {'type': 'loss', 'content': 0.06740349531173706, 'timestamp': '2025-09-30 22:38:18.130511', 'step': 21031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:18.175329', 'step': 21031, 'epoch': 3} {'type': 'loss', 'content': 0.10665637254714966, 'timestamp': '2025-09-30 22:38:18.202368', 'step': 21032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.247546', 'step': 21032, 'epoch': 3} {'type': 'loss', 'content': 0.06287795305252075, 'timestamp': '2025-09-30 22:38:18.253042', 'step': 21033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.284737', 'step': 21033, 'epoch': 3} {'type': 'loss', 'content': 0.08584734052419662, 'timestamp': '2025-09-30 22:38:18.291412', 'step': 21034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:18.325475', 'step': 21034, 'epoch': 3} {'type': 'loss', 'content': 0.056976962834596634, 'timestamp': '2025-09-30 22:38:18.331193', 'step': 21035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.366052', 'step': 21035, 'epoch': 3} {'type': 'loss', 'content': 0.06266871094703674, 'timestamp': '2025-09-30 22:38:18.391915', 'step': 21036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:18.424827', 'step': 21036, 'epoch': 3} {'type': 'loss', 'content': 0.051238469779491425, 'timestamp': '2025-09-30 22:38:18.445661', 'step': 21037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:18.491902', 'step': 21037, 'epoch': 3} {'type': 'loss', 'content': 0.032508064061403275, 'timestamp': '2025-09-30 22:38:18.496515', 'step': 21038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:18.541198', 'step': 21038, 'epoch': 3} {'type': 'loss', 'content': 0.03137943893671036, 'timestamp': '2025-09-30 22:38:18.546645', 'step': 21039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:18.589462', 'step': 21039, 'epoch': 3} {'type': 'loss', 'content': 0.10405729711055756, 'timestamp': '2025-09-30 22:38:18.616987', 'step': 21040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:18.654490', 'step': 21040, 'epoch': 3} {'type': 'loss', 'content': 0.0630878433585167, 'timestamp': '2025-09-30 22:38:18.659659', 'step': 21041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:18.693175', 'step': 21041, 'epoch': 3} {'type': 'loss', 'content': 0.11578855663537979, 'timestamp': '2025-09-30 22:38:18.708451', 'step': 21042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.742313', 'step': 21042, 'epoch': 3} {'type': 'loss', 'content': 0.08545450866222382, 'timestamp': '2025-09-30 22:38:18.747549', 'step': 21043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.782727', 'step': 21043, 'epoch': 3} {'type': 'loss', 'content': 0.05936340242624283, 'timestamp': '2025-09-30 22:38:18.808046', 'step': 21044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:18.850264', 'step': 21044, 'epoch': 3} {'type': 'loss', 'content': 0.10927680879831314, 'timestamp': '2025-09-30 22:38:18.854609', 'step': 21045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:18.886471', 'step': 21045, 'epoch': 3} {'type': 'loss', 'content': 0.10589983314275742, 'timestamp': '2025-09-30 22:38:18.891163', 'step': 21046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:18.925789', 'step': 21046, 'epoch': 3} {'type': 'loss', 'content': 0.031191879883408546, 'timestamp': '2025-09-30 22:38:18.945794', 'step': 21047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:18.980710', 'step': 21047, 'epoch': 3} {'type': 'loss', 'content': 0.07369053363800049, 'timestamp': '2025-09-30 22:38:19.019368', 'step': 21048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.065121', 'step': 21048, 'epoch': 3} {'type': 'loss', 'content': 0.06642090529203415, 'timestamp': '2025-09-30 22:38:19.071227', 'step': 21049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.104922', 'step': 21049, 'epoch': 3} {'type': 'loss', 'content': 0.06148851290345192, 'timestamp': '2025-09-30 22:38:19.119896', 'step': 21050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.164049', 'step': 21050, 'epoch': 3} {'type': 'loss', 'content': 0.05734676122665405, 'timestamp': '2025-09-30 22:38:19.168365', 'step': 21051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.201906', 'step': 21051, 'epoch': 3} {'type': 'loss', 'content': 0.07661180198192596, 'timestamp': '2025-09-30 22:38:19.237585', 'step': 21052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:19.270172', 'step': 21052, 'epoch': 3} {'type': 'loss', 'content': 0.09336893260478973, 'timestamp': '2025-09-30 22:38:19.279872', 'step': 21053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:19.313168', 'step': 21053, 'epoch': 3} {'type': 'loss', 'content': 0.07750409096479416, 'timestamp': '2025-09-30 22:38:19.318739', 'step': 21054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.350733', 'step': 21054, 'epoch': 3} {'type': 'loss', 'content': 0.12594634294509888, 'timestamp': '2025-09-30 22:38:19.359903', 'step': 21055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:19.398745', 'step': 21055, 'epoch': 3} {'type': 'loss', 'content': 0.030381206423044205, 'timestamp': '2025-09-30 22:38:19.424900', 'step': 21056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:19.466988', 'step': 21056, 'epoch': 3} {'type': 'loss', 'content': 0.10717298835515976, 'timestamp': '2025-09-30 22:38:19.469685', 'step': 21057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.504495', 'step': 21057, 'epoch': 3} {'type': 'loss', 'content': 0.14453727006912231, 'timestamp': '2025-09-30 22:38:19.509303', 'step': 21058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.549556', 'step': 21058, 'epoch': 3} {'type': 'loss', 'content': 0.10289838165044785, 'timestamp': '2025-09-30 22:38:19.554282', 'step': 21059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.591415', 'step': 21059, 'epoch': 3} {'type': 'loss', 'content': 0.03173993527889252, 'timestamp': '2025-09-30 22:38:19.625590', 'step': 21060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.656680', 'step': 21060, 'epoch': 3} {'type': 'loss', 'content': 0.015150496736168861, 'timestamp': '2025-09-30 22:38:19.674432', 'step': 21061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.710082', 'step': 21061, 'epoch': 3} {'type': 'loss', 'content': 0.07722274959087372, 'timestamp': '2025-09-30 22:38:19.713981', 'step': 21062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.755333', 'step': 21062, 'epoch': 3} {'type': 'loss', 'content': 0.03597210720181465, 'timestamp': '2025-09-30 22:38:19.759278', 'step': 21063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.792164', 'step': 21063, 'epoch': 3} {'type': 'loss', 'content': 0.05779372155666351, 'timestamp': '2025-09-30 22:38:19.818272', 'step': 21064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.851437', 'step': 21064, 'epoch': 3} {'type': 'loss', 'content': 0.14258134365081787, 'timestamp': '2025-09-30 22:38:19.855639', 'step': 21065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.900688', 'step': 21065, 'epoch': 3} {'type': 'loss', 'content': 0.04831904545426369, 'timestamp': '2025-09-30 22:38:19.915045', 'step': 21066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:19.948454', 'step': 21066, 'epoch': 3} {'type': 'loss', 'content': 0.09575945138931274, 'timestamp': '2025-09-30 22:38:19.961853', 'step': 21067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:19.995919', 'step': 21067, 'epoch': 3} {'type': 'loss', 'content': 0.07167482376098633, 'timestamp': '2025-09-30 22:38:20.021504', 'step': 21068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.054443', 'step': 21068, 'epoch': 3} {'type': 'loss', 'content': 0.08085386455059052, 'timestamp': '2025-09-30 22:38:20.059258', 'step': 21069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:20.094268', 'step': 21069, 'epoch': 3} {'type': 'loss', 'content': 0.0833921879529953, 'timestamp': '2025-09-30 22:38:20.099465', 'step': 21070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.133138', 'step': 21070, 'epoch': 3} {'type': 'loss', 'content': 0.04163592308759689, 'timestamp': '2025-09-30 22:38:20.150803', 'step': 21071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:20.193886', 'step': 21071, 'epoch': 3} {'type': 'loss', 'content': 0.04507036879658699, 'timestamp': '2025-09-30 22:38:20.220622', 'step': 21072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.251650', 'step': 21072, 'epoch': 3} {'type': 'loss', 'content': 0.10571816563606262, 'timestamp': '2025-09-30 22:38:20.265629', 'step': 21073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:20.307460', 'step': 21073, 'epoch': 3} {'type': 'loss', 'content': 0.05460396781563759, 'timestamp': '2025-09-30 22:38:20.313534', 'step': 21074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.346961', 'step': 21074, 'epoch': 3} {'type': 'loss', 'content': 0.08543508499860764, 'timestamp': '2025-09-30 22:38:20.359383', 'step': 21075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.391245', 'step': 21075, 'epoch': 3} {'type': 'loss', 'content': 0.13252541422843933, 'timestamp': '2025-09-30 22:38:20.428054', 'step': 21076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:20.462613', 'step': 21076, 'epoch': 3} {'type': 'loss', 'content': 0.11539492011070251, 'timestamp': '2025-09-30 22:38:20.467074', 'step': 21077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:20.499398', 'step': 21077, 'epoch': 3} {'type': 'loss', 'content': 0.1396915167570114, 'timestamp': '2025-09-30 22:38:20.503338', 'step': 21078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.536191', 'step': 21078, 'epoch': 3} {'type': 'loss', 'content': 0.0411001555621624, 'timestamp': '2025-09-30 22:38:20.540620', 'step': 21079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.573174', 'step': 21079, 'epoch': 3} {'type': 'loss', 'content': 0.05782691761851311, 'timestamp': '2025-09-30 22:38:20.599114', 'step': 21080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.642410', 'step': 21080, 'epoch': 3} {'type': 'loss', 'content': 0.048043206334114075, 'timestamp': '2025-09-30 22:38:20.648260', 'step': 21081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.682283', 'step': 21081, 'epoch': 3} {'type': 'loss', 'content': 0.10223691910505295, 'timestamp': '2025-09-30 22:38:20.693234', 'step': 21082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.735821', 'step': 21082, 'epoch': 3} {'type': 'loss', 'content': 0.057424359023571014, 'timestamp': '2025-09-30 22:38:20.739683', 'step': 21083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.783433', 'step': 21083, 'epoch': 3} {'type': 'loss', 'content': 0.0764402374625206, 'timestamp': '2025-09-30 22:38:20.817183', 'step': 21084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:20.859592', 'step': 21084, 'epoch': 3} {'type': 'loss', 'content': 0.14203424751758575, 'timestamp': '2025-09-30 22:38:20.872232', 'step': 21085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.908220', 'step': 21085, 'epoch': 3} {'type': 'loss', 'content': 0.036173902451992035, 'timestamp': '2025-09-30 22:38:20.917746', 'step': 21086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:20.958591', 'step': 21086, 'epoch': 3} {'type': 'loss', 'content': 0.06877130270004272, 'timestamp': '2025-09-30 22:38:20.961801', 'step': 21087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:20.999419', 'step': 21087, 'epoch': 3} {'type': 'loss', 'content': 0.0494057834148407, 'timestamp': '2025-09-30 22:38:21.025865', 'step': 21088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.061754', 'step': 21088, 'epoch': 3} {'type': 'loss', 'content': 0.06805159151554108, 'timestamp': '2025-09-30 22:38:21.066115', 'step': 21089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.108784', 'step': 21089, 'epoch': 3} {'type': 'loss', 'content': 0.05598152428865433, 'timestamp': '2025-09-30 22:38:21.115127', 'step': 21090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:21.162010', 'step': 21090, 'epoch': 3} {'type': 'loss', 'content': 0.06964308768510818, 'timestamp': '2025-09-30 22:38:21.165646', 'step': 21091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.199175', 'step': 21091, 'epoch': 3} {'type': 'loss', 'content': 0.06345143914222717, 'timestamp': '2025-09-30 22:38:21.223108', 'step': 21092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.255264', 'step': 21092, 'epoch': 3} {'type': 'loss', 'content': 0.020669784396886826, 'timestamp': '2025-09-30 22:38:21.260902', 'step': 21093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.293662', 'step': 21093, 'epoch': 3} {'type': 'loss', 'content': 0.09565488994121552, 'timestamp': '2025-09-30 22:38:21.298162', 'step': 21094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.341673', 'step': 21094, 'epoch': 3} {'type': 'loss', 'content': 0.031580157577991486, 'timestamp': '2025-09-30 22:38:21.346480', 'step': 21095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.379958', 'step': 21095, 'epoch': 3} {'type': 'loss', 'content': 0.06698399782180786, 'timestamp': '2025-09-30 22:38:21.407280', 'step': 21096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.443896', 'step': 21096, 'epoch': 3} {'type': 'loss', 'content': 0.10659554600715637, 'timestamp': '2025-09-30 22:38:21.447858', 'step': 21097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.480015', 'step': 21097, 'epoch': 3} {'type': 'loss', 'content': 0.10364710539579391, 'timestamp': '2025-09-30 22:38:21.486962', 'step': 21098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.519844', 'step': 21098, 'epoch': 3} {'type': 'loss', 'content': 0.020371651276946068, 'timestamp': '2025-09-30 22:38:21.522707', 'step': 21099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.563268', 'step': 21099, 'epoch': 3} {'type': 'loss', 'content': 0.09577567130327225, 'timestamp': '2025-09-30 22:38:21.588898', 'step': 21100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:21.620976', 'step': 21100, 'epoch': 3} {'type': 'loss', 'content': 0.025816306471824646, 'timestamp': '2025-09-30 22:38:21.625344', 'step': 21101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:21.671507', 'step': 21101, 'epoch': 3} {'type': 'loss', 'content': 0.05621309205889702, 'timestamp': '2025-09-30 22:38:21.675494', 'step': 21102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.714899', 'step': 21102, 'epoch': 3} {'type': 'loss', 'content': 0.10341880470514297, 'timestamp': '2025-09-30 22:38:21.718417', 'step': 21103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.749692', 'step': 21103, 'epoch': 3} {'type': 'loss', 'content': 0.04813719913363457, 'timestamp': '2025-09-30 22:38:21.782369', 'step': 21104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:21.815960', 'step': 21104, 'epoch': 3} {'type': 'loss', 'content': 0.06703875958919525, 'timestamp': '2025-09-30 22:38:21.819344', 'step': 21105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:21.857438', 'step': 21105, 'epoch': 3} {'type': 'loss', 'content': 0.04714055359363556, 'timestamp': '2025-09-30 22:38:21.862493', 'step': 21106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.895169', 'step': 21106, 'epoch': 3} {'type': 'loss', 'content': 0.06490115821361542, 'timestamp': '2025-09-30 22:38:21.900695', 'step': 21107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:21.933322', 'step': 21107, 'epoch': 3} {'type': 'loss', 'content': 0.04503485932946205, 'timestamp': '2025-09-30 22:38:21.957552', 'step': 21108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:22.008171', 'step': 21108, 'epoch': 3} {'type': 'loss', 'content': 0.019962217658758163, 'timestamp': '2025-09-30 22:38:22.015335', 'step': 21109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:22.051144', 'step': 21109, 'epoch': 3} {'type': 'loss', 'content': 0.17990951240062714, 'timestamp': '2025-09-30 22:38:22.057660', 'step': 21110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:22.090268', 'step': 21110, 'epoch': 3} {'type': 'loss', 'content': 0.041277773678302765, 'timestamp': '2025-09-30 22:38:22.095675', 'step': 21111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:22.132876', 'step': 21111, 'epoch': 3} {'type': 'loss', 'content': 0.08846371620893478, 'timestamp': '2025-09-30 22:38:22.160974', 'step': 21112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:22.192322', 'step': 21112, 'epoch': 3} {'type': 'loss', 'content': 0.08718691766262054, 'timestamp': '2025-09-30 22:38:22.206549', 'step': 21113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.248740', 'step': 21113, 'epoch': 3} {'type': 'loss', 'content': 0.15740437805652618, 'timestamp': '2025-09-30 22:38:22.265028', 'step': 21114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:22.296771', 'step': 21114, 'epoch': 3} {'type': 'loss', 'content': 0.07946678251028061, 'timestamp': '2025-09-30 22:38:22.300052', 'step': 21115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.333346', 'step': 21115, 'epoch': 3} {'type': 'loss', 'content': 0.045185718685388565, 'timestamp': '2025-09-30 22:38:22.357756', 'step': 21116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.390161', 'step': 21116, 'epoch': 3} {'type': 'loss', 'content': 0.09275621920824051, 'timestamp': '2025-09-30 22:38:22.394605', 'step': 21117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:22.426289', 'step': 21117, 'epoch': 3} {'type': 'loss', 'content': 0.024865811690688133, 'timestamp': '2025-09-30 22:38:22.430440', 'step': 21118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.463528', 'step': 21118, 'epoch': 3} {'type': 'loss', 'content': 0.06425091624259949, 'timestamp': '2025-09-30 22:38:22.478686', 'step': 21119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:22.511322', 'step': 21119, 'epoch': 3} {'type': 'loss', 'content': 0.02240118570625782, 'timestamp': '2025-09-30 22:38:22.537432', 'step': 21120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.570157', 'step': 21120, 'epoch': 3} {'type': 'loss', 'content': 0.12139427661895752, 'timestamp': '2025-09-30 22:38:22.575343', 'step': 21121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:22.613674', 'step': 21121, 'epoch': 3} {'type': 'loss', 'content': 0.05319923534989357, 'timestamp': '2025-09-30 22:38:22.619387', 'step': 21122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.652006', 'step': 21122, 'epoch': 3} {'type': 'loss', 'content': 0.05304206162691116, 'timestamp': '2025-09-30 22:38:22.656415', 'step': 21123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:22.689198', 'step': 21123, 'epoch': 3} {'type': 'loss', 'content': 0.08734547346830368, 'timestamp': '2025-09-30 22:38:22.717200', 'step': 21124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:22.750499', 'step': 21124, 'epoch': 3} {'type': 'loss', 'content': 0.05295426771044731, 'timestamp': '2025-09-30 22:38:22.757701', 'step': 21125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:22.791680', 'step': 21125, 'epoch': 3} {'type': 'loss', 'content': 0.12025853246450424, 'timestamp': '2025-09-30 22:38:22.796559', 'step': 21126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:22.829595', 'step': 21126, 'epoch': 3} {'type': 'loss', 'content': 0.09423647820949554, 'timestamp': '2025-09-30 22:38:22.833307', 'step': 21127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:22.874586', 'step': 21127, 'epoch': 3} {'type': 'loss', 'content': 0.053724076598882675, 'timestamp': '2025-09-30 22:38:22.900102', 'step': 21128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:22.939805', 'step': 21128, 'epoch': 3} {'type': 'loss', 'content': 0.0860130563378334, 'timestamp': '2025-09-30 22:38:22.943019', 'step': 21129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:22.986582', 'step': 21129, 'epoch': 3} {'type': 'loss', 'content': 0.11413171887397766, 'timestamp': '2025-09-30 22:38:22.990441', 'step': 21130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.030159', 'step': 21130, 'epoch': 3} {'type': 'loss', 'content': 0.11473855376243591, 'timestamp': '2025-09-30 22:38:23.045139', 'step': 21131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:23.080323', 'step': 21131, 'epoch': 3} {'type': 'loss', 'content': 0.08222699165344238, 'timestamp': '2025-09-30 22:38:23.107107', 'step': 21132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.139739', 'step': 21132, 'epoch': 3} {'type': 'loss', 'content': 0.09061422944068909, 'timestamp': '2025-09-30 22:38:23.144640', 'step': 21133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:23.190992', 'step': 21133, 'epoch': 3} {'type': 'loss', 'content': 0.07454840838909149, 'timestamp': '2025-09-30 22:38:23.195537', 'step': 21134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.229453', 'step': 21134, 'epoch': 3} {'type': 'loss', 'content': 0.02363862469792366, 'timestamp': '2025-09-30 22:38:23.234906', 'step': 21135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:23.277086', 'step': 21135, 'epoch': 3} {'type': 'loss', 'content': 0.043775156140327454, 'timestamp': '2025-09-30 22:38:23.302942', 'step': 21136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.336271', 'step': 21136, 'epoch': 3} {'type': 'loss', 'content': 0.09242437779903412, 'timestamp': '2025-09-30 22:38:23.341220', 'step': 21137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:23.374668', 'step': 21137, 'epoch': 3} {'type': 'loss', 'content': 0.1046435683965683, 'timestamp': '2025-09-30 22:38:23.380337', 'step': 21138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:23.425554', 'step': 21138, 'epoch': 3} {'type': 'loss', 'content': 0.0905250757932663, 'timestamp': '2025-09-30 22:38:23.430320', 'step': 21139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:23.473814', 'step': 21139, 'epoch': 3} {'type': 'loss', 'content': 0.08491123467683792, 'timestamp': '2025-09-30 22:38:23.510132', 'step': 21140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.543534', 'step': 21140, 'epoch': 3} {'type': 'loss', 'content': 0.04616463929414749, 'timestamp': '2025-09-30 22:38:23.549450', 'step': 21141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:23.588038', 'step': 21141, 'epoch': 3} {'type': 'loss', 'content': 0.03809763491153717, 'timestamp': '2025-09-30 22:38:23.605548', 'step': 21142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.648709', 'step': 21142, 'epoch': 3} {'type': 'loss', 'content': 0.02674064226448536, 'timestamp': '2025-09-30 22:38:23.653724', 'step': 21143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.687317', 'step': 21143, 'epoch': 3} {'type': 'loss', 'content': 0.07066559046506882, 'timestamp': '2025-09-30 22:38:23.715633', 'step': 21144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:23.748200', 'step': 21144, 'epoch': 3} {'type': 'loss', 'content': 0.018784884363412857, 'timestamp': '2025-09-30 22:38:23.752173', 'step': 21145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:23.794267', 'step': 21145, 'epoch': 3} {'type': 'loss', 'content': 0.05552879348397255, 'timestamp': '2025-09-30 22:38:23.800225', 'step': 21146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.833600', 'step': 21146, 'epoch': 3} {'type': 'loss', 'content': 0.08584253489971161, 'timestamp': '2025-09-30 22:38:23.839471', 'step': 21147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:23.872986', 'step': 21147, 'epoch': 3} {'type': 'loss', 'content': 0.08430737257003784, 'timestamp': '2025-09-30 22:38:23.899208', 'step': 21148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.931872', 'step': 21148, 'epoch': 3} {'type': 'loss', 'content': 0.03852321207523346, 'timestamp': '2025-09-30 22:38:23.935903', 'step': 21149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:23.976073', 'step': 21149, 'epoch': 3} {'type': 'loss', 'content': 0.06445493549108505, 'timestamp': '2025-09-30 22:38:23.980831', 'step': 21150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:24.023338', 'step': 21150, 'epoch': 3} {'type': 'loss', 'content': 0.033924538642168045, 'timestamp': '2025-09-30 22:38:24.026839', 'step': 21151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:24.069894', 'step': 21151, 'epoch': 3} {'type': 'loss', 'content': 0.11585955321788788, 'timestamp': '2025-09-30 22:38:24.096170', 'step': 21152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.129959', 'step': 21152, 'epoch': 3} {'type': 'loss', 'content': 0.04398180916905403, 'timestamp': '2025-09-30 22:38:24.135362', 'step': 21153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.176751', 'step': 21153, 'epoch': 3} {'type': 'loss', 'content': 0.04383009672164917, 'timestamp': '2025-09-30 22:38:24.181548', 'step': 21154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.225482', 'step': 21154, 'epoch': 3} {'type': 'loss', 'content': 0.06273532658815384, 'timestamp': '2025-09-30 22:38:24.229622', 'step': 21155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.270911', 'step': 21155, 'epoch': 3} {'type': 'loss', 'content': 0.03840944916009903, 'timestamp': '2025-09-30 22:38:24.296527', 'step': 21156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:24.328441', 'step': 21156, 'epoch': 3} {'type': 'loss', 'content': 0.06621116399765015, 'timestamp': '2025-09-30 22:38:24.344231', 'step': 21157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.385065', 'step': 21157, 'epoch': 3} {'type': 'loss', 'content': 0.06086832657456398, 'timestamp': '2025-09-30 22:38:24.389598', 'step': 21158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.423737', 'step': 21158, 'epoch': 3} {'type': 'loss', 'content': 0.07667312026023865, 'timestamp': '2025-09-30 22:38:24.427924', 'step': 21159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:24.459454', 'step': 21159, 'epoch': 3} {'type': 'loss', 'content': 0.06806544214487076, 'timestamp': '2025-09-30 22:38:24.497031', 'step': 21160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.530088', 'step': 21160, 'epoch': 3} {'type': 'loss', 'content': 0.046757277101278305, 'timestamp': '2025-09-30 22:38:24.543240', 'step': 21161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:24.577419', 'step': 21161, 'epoch': 3} {'type': 'loss', 'content': 0.14325611293315887, 'timestamp': '2025-09-30 22:38:24.582162', 'step': 21162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:24.614267', 'step': 21162, 'epoch': 3} {'type': 'loss', 'content': 0.07567096501588821, 'timestamp': '2025-09-30 22:38:24.619416', 'step': 21163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.653517', 'step': 21163, 'epoch': 3} {'type': 'loss', 'content': 0.04146130383014679, 'timestamp': '2025-09-30 22:38:24.689580', 'step': 21164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:24.721963', 'step': 21164, 'epoch': 3} {'type': 'loss', 'content': 0.028825832530856133, 'timestamp': '2025-09-30 22:38:24.726426', 'step': 21165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.759265', 'step': 21165, 'epoch': 3} {'type': 'loss', 'content': 0.032986778765916824, 'timestamp': '2025-09-30 22:38:24.763239', 'step': 21166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:24.796346', 'step': 21166, 'epoch': 3} {'type': 'loss', 'content': 0.07462823390960693, 'timestamp': '2025-09-30 22:38:24.811903', 'step': 21167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.843667', 'step': 21167, 'epoch': 3} {'type': 'loss', 'content': 0.04915086179971695, 'timestamp': '2025-09-30 22:38:24.876399', 'step': 21168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:24.911796', 'step': 21168, 'epoch': 3} {'type': 'loss', 'content': 0.08042646199464798, 'timestamp': '2025-09-30 22:38:24.915648', 'step': 21169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.947461', 'step': 21169, 'epoch': 3} {'type': 'loss', 'content': 0.09849625080823898, 'timestamp': '2025-09-30 22:38:24.952890', 'step': 21170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:24.986953', 'step': 21170, 'epoch': 3} {'type': 'loss', 'content': 0.028557071462273598, 'timestamp': '2025-09-30 22:38:24.991329', 'step': 21171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:25.031050', 'step': 21171, 'epoch': 3} {'type': 'loss', 'content': 0.04627487063407898, 'timestamp': '2025-09-30 22:38:25.055642', 'step': 21172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:25.090296', 'step': 21172, 'epoch': 3} {'type': 'loss', 'content': 0.04031546413898468, 'timestamp': '2025-09-30 22:38:25.094747', 'step': 21173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.126920', 'step': 21173, 'epoch': 3} {'type': 'loss', 'content': 0.17500516772270203, 'timestamp': '2025-09-30 22:38:25.131679', 'step': 21174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:25.164787', 'step': 21174, 'epoch': 3} {'type': 'loss', 'content': 0.0481550507247448, 'timestamp': '2025-09-30 22:38:25.170335', 'step': 21175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:25.203353', 'step': 21175, 'epoch': 3} {'type': 'loss', 'content': 0.03239339217543602, 'timestamp': '2025-09-30 22:38:25.228444', 'step': 21176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:25.259951', 'step': 21176, 'epoch': 3} {'type': 'loss', 'content': 0.04889697954058647, 'timestamp': '2025-09-30 22:38:25.271221', 'step': 21177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:25.312171', 'step': 21177, 'epoch': 3} {'type': 'loss', 'content': 0.05610692501068115, 'timestamp': '2025-09-30 22:38:25.315389', 'step': 21178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:25.348755', 'step': 21178, 'epoch': 3} {'type': 'loss', 'content': 0.12412644922733307, 'timestamp': '2025-09-30 22:38:25.355775', 'step': 21179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.388835', 'step': 21179, 'epoch': 3} {'type': 'loss', 'content': 0.05098562687635422, 'timestamp': '2025-09-30 22:38:25.413096', 'step': 21180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.445814', 'step': 21180, 'epoch': 3} {'type': 'loss', 'content': 0.017520885914564133, 'timestamp': '2025-09-30 22:38:25.459721', 'step': 21181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.498317', 'step': 21181, 'epoch': 3} {'type': 'loss', 'content': 0.030167831107974052, 'timestamp': '2025-09-30 22:38:25.502056', 'step': 21182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:25.534527', 'step': 21182, 'epoch': 3} {'type': 'loss', 'content': 0.03918793424963951, 'timestamp': '2025-09-30 22:38:25.539680', 'step': 21183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:25.572677', 'step': 21183, 'epoch': 3} {'type': 'loss', 'content': 0.08137921243906021, 'timestamp': '2025-09-30 22:38:25.597381', 'step': 21184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.630529', 'step': 21184, 'epoch': 3} {'type': 'loss', 'content': 0.047757845371961594, 'timestamp': '2025-09-30 22:38:25.635275', 'step': 21185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:25.669331', 'step': 21185, 'epoch': 3} {'type': 'loss', 'content': 0.11256251484155655, 'timestamp': '2025-09-30 22:38:25.673462', 'step': 21186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.704981', 'step': 21186, 'epoch': 3} {'type': 'loss', 'content': 0.1375916451215744, 'timestamp': '2025-09-30 22:38:25.727244', 'step': 21187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.760370', 'step': 21187, 'epoch': 3} {'type': 'loss', 'content': 0.06566516309976578, 'timestamp': '2025-09-30 22:38:25.799942', 'step': 21188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.832226', 'step': 21188, 'epoch': 3} {'type': 'loss', 'content': 0.10940438508987427, 'timestamp': '2025-09-30 22:38:25.837917', 'step': 21189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:25.869764', 'step': 21189, 'epoch': 3} {'type': 'loss', 'content': 0.06698116660118103, 'timestamp': '2025-09-30 22:38:25.873662', 'step': 21190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:25.913822', 'step': 21190, 'epoch': 3} {'type': 'loss', 'content': 0.07527842372655869, 'timestamp': '2025-09-30 22:38:25.917113', 'step': 21191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:25.959786', 'step': 21191, 'epoch': 3} {'type': 'loss', 'content': 0.03988340124487877, 'timestamp': '2025-09-30 22:38:25.984561', 'step': 21192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.018939', 'step': 21192, 'epoch': 3} {'type': 'loss', 'content': 0.041356850415468216, 'timestamp': '2025-09-30 22:38:26.033384', 'step': 21193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:26.071925', 'step': 21193, 'epoch': 3} {'type': 'loss', 'content': 0.09546675533056259, 'timestamp': '2025-09-30 22:38:26.076775', 'step': 21194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:26.118317', 'step': 21194, 'epoch': 3} {'type': 'loss', 'content': 0.051440007984638214, 'timestamp': '2025-09-30 22:38:26.122230', 'step': 21195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.155772', 'step': 21195, 'epoch': 3} {'type': 'loss', 'content': 0.14089784026145935, 'timestamp': '2025-09-30 22:38:26.189976', 'step': 21196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.224510', 'step': 21196, 'epoch': 3} {'type': 'loss', 'content': 0.029296718537807465, 'timestamp': '2025-09-30 22:38:26.235534', 'step': 21197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.268239', 'step': 21197, 'epoch': 3} {'type': 'loss', 'content': 0.09091399610042572, 'timestamp': '2025-09-30 22:38:26.272486', 'step': 21198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.305746', 'step': 21198, 'epoch': 3} {'type': 'loss', 'content': 0.08415324240922928, 'timestamp': '2025-09-30 22:38:26.311019', 'step': 21199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:26.346836', 'step': 21199, 'epoch': 3} {'type': 'loss', 'content': 0.07334184646606445, 'timestamp': '2025-09-30 22:38:26.372660', 'step': 21200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.408588', 'step': 21200, 'epoch': 3} {'type': 'loss', 'content': 0.07457070797681808, 'timestamp': '2025-09-30 22:38:26.413398', 'step': 21201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.448613', 'step': 21201, 'epoch': 3} {'type': 'loss', 'content': 0.00879052747040987, 'timestamp': '2025-09-30 22:38:26.453631', 'step': 21202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.487612', 'step': 21202, 'epoch': 3} {'type': 'loss', 'content': 0.13171982765197754, 'timestamp': '2025-09-30 22:38:26.492822', 'step': 21203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:26.524165', 'step': 21203, 'epoch': 3} {'type': 'loss', 'content': 0.06541479378938675, 'timestamp': '2025-09-30 22:38:26.557858', 'step': 21204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.588950', 'step': 21204, 'epoch': 3} {'type': 'loss', 'content': 0.11333557218313217, 'timestamp': '2025-09-30 22:38:26.594260', 'step': 21205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:26.625748', 'step': 21205, 'epoch': 3} {'type': 'loss', 'content': 0.07723507285118103, 'timestamp': '2025-09-30 22:38:26.631417', 'step': 21206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.666146', 'step': 21206, 'epoch': 3} {'type': 'loss', 'content': 0.05795833468437195, 'timestamp': '2025-09-30 22:38:26.672859', 'step': 21207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:26.705453', 'step': 21207, 'epoch': 3} {'type': 'loss', 'content': 0.05127868801355362, 'timestamp': '2025-09-30 22:38:26.742184', 'step': 21208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.777090', 'step': 21208, 'epoch': 3} {'type': 'loss', 'content': 0.03624469041824341, 'timestamp': '2025-09-30 22:38:26.782767', 'step': 21209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.818378', 'step': 21209, 'epoch': 3} {'type': 'loss', 'content': 0.058984607458114624, 'timestamp': '2025-09-30 22:38:26.832476', 'step': 21210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:26.876638', 'step': 21210, 'epoch': 3} {'type': 'loss', 'content': 0.07339762896299362, 'timestamp': '2025-09-30 22:38:26.884487', 'step': 21211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:26.921380', 'step': 21211, 'epoch': 3} {'type': 'loss', 'content': 0.16918039321899414, 'timestamp': '2025-09-30 22:38:26.947858', 'step': 21212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:26.991820', 'step': 21212, 'epoch': 3} {'type': 'loss', 'content': 0.04469192773103714, 'timestamp': '2025-09-30 22:38:26.997779', 'step': 21213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:27.033206', 'step': 21213, 'epoch': 3} {'type': 'loss', 'content': 0.11872664839029312, 'timestamp': '2025-09-30 22:38:27.037225', 'step': 21214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:27.070100', 'step': 21214, 'epoch': 3} {'type': 'loss', 'content': 0.18112331628799438, 'timestamp': '2025-09-30 22:38:27.075650', 'step': 21215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.125116', 'step': 21215, 'epoch': 3} {'type': 'loss', 'content': 0.051540523767471313, 'timestamp': '2025-09-30 22:38:27.152267', 'step': 21216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:27.188495', 'step': 21216, 'epoch': 3} {'type': 'loss', 'content': 0.039224009960889816, 'timestamp': '2025-09-30 22:38:27.192333', 'step': 21217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.237400', 'step': 21217, 'epoch': 3} {'type': 'loss', 'content': 0.07867870479822159, 'timestamp': '2025-09-30 22:38:27.241419', 'step': 21218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:27.274104', 'step': 21218, 'epoch': 3} {'type': 'loss', 'content': 0.07375012338161469, 'timestamp': '2025-09-30 22:38:27.279708', 'step': 21219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.316289', 'step': 21219, 'epoch': 3} {'type': 'loss', 'content': 0.0892881378531456, 'timestamp': '2025-09-30 22:38:27.343764', 'step': 21220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.378029', 'step': 21220, 'epoch': 3} {'type': 'loss', 'content': 0.07865612953901291, 'timestamp': '2025-09-30 22:38:27.382700', 'step': 21221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.415466', 'step': 21221, 'epoch': 3} {'type': 'loss', 'content': 0.10545413196086884, 'timestamp': '2025-09-30 22:38:27.422994', 'step': 21222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:27.457169', 'step': 21222, 'epoch': 3} {'type': 'loss', 'content': 0.022492652758955956, 'timestamp': '2025-09-30 22:38:27.461387', 'step': 21223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.495041', 'step': 21223, 'epoch': 3} {'type': 'loss', 'content': 0.02262391336262226, 'timestamp': '2025-09-30 22:38:27.522065', 'step': 21224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:27.556703', 'step': 21224, 'epoch': 3} {'type': 'loss', 'content': 0.06830241531133652, 'timestamp': '2025-09-30 22:38:27.561105', 'step': 21225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.594169', 'step': 21225, 'epoch': 3} {'type': 'loss', 'content': 0.0518118254840374, 'timestamp': '2025-09-30 22:38:27.598156', 'step': 21226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.631981', 'step': 21226, 'epoch': 3} {'type': 'loss', 'content': 0.16989243030548096, 'timestamp': '2025-09-30 22:38:27.636354', 'step': 21227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:38:27.670473', 'step': 21227, 'epoch': 3} {'type': 'loss', 'content': 0.035293493419885635, 'timestamp': '2025-09-30 22:38:27.698139', 'step': 21228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:27.736962', 'step': 21228, 'epoch': 3} {'type': 'loss', 'content': 0.06070016324520111, 'timestamp': '2025-09-30 22:38:27.742724', 'step': 21229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.775666', 'step': 21229, 'epoch': 3} {'type': 'loss', 'content': 0.06662490218877792, 'timestamp': '2025-09-30 22:38:27.780389', 'step': 21230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:27.812769', 'step': 21230, 'epoch': 3} {'type': 'loss', 'content': 0.07495546340942383, 'timestamp': '2025-09-30 22:38:27.824177', 'step': 21231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:27.859509', 'step': 21231, 'epoch': 3} {'type': 'loss', 'content': 0.01720809005200863, 'timestamp': '2025-09-30 22:38:27.884133', 'step': 21232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.917435', 'step': 21232, 'epoch': 3} {'type': 'loss', 'content': 0.09701015800237656, 'timestamp': '2025-09-30 22:38:27.931849', 'step': 21233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:27.964273', 'step': 21233, 'epoch': 3} {'type': 'loss', 'content': 0.03373311832547188, 'timestamp': '2025-09-30 22:38:27.970233', 'step': 21234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.003006', 'step': 21234, 'epoch': 3} {'type': 'loss', 'content': 0.13072246313095093, 'timestamp': '2025-09-30 22:38:28.007170', 'step': 21235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:28.041313', 'step': 21235, 'epoch': 3} {'type': 'loss', 'content': 0.1539238691329956, 'timestamp': '2025-09-30 22:38:28.076346', 'step': 21236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.108759', 'step': 21236, 'epoch': 3} {'type': 'loss', 'content': 0.10526201874017715, 'timestamp': '2025-09-30 22:38:28.124678', 'step': 21237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:28.160107', 'step': 21237, 'epoch': 3} {'type': 'loss', 'content': 0.09821689873933792, 'timestamp': '2025-09-30 22:38:28.164287', 'step': 21238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.205364', 'step': 21238, 'epoch': 3} {'type': 'loss', 'content': 0.16510622203350067, 'timestamp': '2025-09-30 22:38:28.211800', 'step': 21239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:28.258680', 'step': 21239, 'epoch': 3} {'type': 'loss', 'content': 0.02078898623585701, 'timestamp': '2025-09-30 22:38:28.285809', 'step': 21240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.330763', 'step': 21240, 'epoch': 3} {'type': 'loss', 'content': 0.07392933964729309, 'timestamp': '2025-09-30 22:38:28.345185', 'step': 21241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:28.377741', 'step': 21241, 'epoch': 3} {'type': 'loss', 'content': 0.12690217792987823, 'timestamp': '2025-09-30 22:38:28.382459', 'step': 21242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:28.439946', 'step': 21242, 'epoch': 3} {'type': 'loss', 'content': 0.07663573324680328, 'timestamp': '2025-09-30 22:38:28.455312', 'step': 21243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.490607', 'step': 21243, 'epoch': 3} {'type': 'loss', 'content': 0.046006567776203156, 'timestamp': '2025-09-30 22:38:28.516510', 'step': 21244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:28.558342', 'step': 21244, 'epoch': 3} {'type': 'loss', 'content': 0.046036653220653534, 'timestamp': '2025-09-30 22:38:28.564808', 'step': 21245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:28.597090', 'step': 21245, 'epoch': 3} {'type': 'loss', 'content': 0.04515828564763069, 'timestamp': '2025-09-30 22:38:28.607547', 'step': 21246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:28.641705', 'step': 21246, 'epoch': 3} {'type': 'loss', 'content': 0.04379334673285484, 'timestamp': '2025-09-30 22:38:28.647387', 'step': 21247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.683740', 'step': 21247, 'epoch': 3} {'type': 'loss', 'content': 0.06792145222425461, 'timestamp': '2025-09-30 22:38:28.708522', 'step': 21248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:28.744352', 'step': 21248, 'epoch': 3} {'type': 'loss', 'content': 0.10916589200496674, 'timestamp': '2025-09-30 22:38:28.749376', 'step': 21249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.784606', 'step': 21249, 'epoch': 3} {'type': 'loss', 'content': 0.06268253177404404, 'timestamp': '2025-09-30 22:38:28.790119', 'step': 21250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.823617', 'step': 21250, 'epoch': 3} {'type': 'loss', 'content': 0.08571582287549973, 'timestamp': '2025-09-30 22:38:28.827609', 'step': 21251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:28.859615', 'step': 21251, 'epoch': 3} {'type': 'loss', 'content': 0.04864076152443886, 'timestamp': '2025-09-30 22:38:28.887388', 'step': 21252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:28.920171', 'step': 21252, 'epoch': 3} {'type': 'loss', 'content': 0.019471025094389915, 'timestamp': '2025-09-30 22:38:28.929812', 'step': 21253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:28.965461', 'step': 21253, 'epoch': 3} {'type': 'loss', 'content': 0.062169838696718216, 'timestamp': '2025-09-30 22:38:28.982873', 'step': 21254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:29.017371', 'step': 21254, 'epoch': 3} {'type': 'loss', 'content': 0.05508112907409668, 'timestamp': '2025-09-30 22:38:29.023293', 'step': 21255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:29.056760', 'step': 21255, 'epoch': 3} {'type': 'loss', 'content': 0.08946368843317032, 'timestamp': '2025-09-30 22:38:29.081728', 'step': 21256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:29.114500', 'step': 21256, 'epoch': 3} {'type': 'loss', 'content': 0.06813493371009827, 'timestamp': '2025-09-30 22:38:29.119905', 'step': 21257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:29.153541', 'step': 21257, 'epoch': 3} {'type': 'loss', 'content': 0.11854946613311768, 'timestamp': '2025-09-30 22:38:29.159098', 'step': 21258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:29.191713', 'step': 21258, 'epoch': 3} {'type': 'loss', 'content': 0.05686052143573761, 'timestamp': '2025-09-30 22:38:29.196124', 'step': 21259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:29.229730', 'step': 21259, 'epoch': 3} {'type': 'loss', 'content': 0.0675559714436531, 'timestamp': '2025-09-30 22:38:29.266260', 'step': 21260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:29.297960', 'step': 21260, 'epoch': 3} {'type': 'loss', 'content': 0.07117211818695068, 'timestamp': '2025-09-30 22:38:29.302612', 'step': 21261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.335360', 'step': 21261, 'epoch': 3} {'type': 'loss', 'content': 0.09583315998315811, 'timestamp': '2025-09-30 22:38:29.346594', 'step': 21262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.391314', 'step': 21262, 'epoch': 3} {'type': 'loss', 'content': 0.14980927109718323, 'timestamp': '2025-09-30 22:38:29.396416', 'step': 21263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.436265', 'step': 21263, 'epoch': 3} {'type': 'loss', 'content': 0.05275851860642433, 'timestamp': '2025-09-30 22:38:29.476672', 'step': 21264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.508450', 'step': 21264, 'epoch': 3} {'type': 'loss', 'content': 0.09765452146530151, 'timestamp': '2025-09-30 22:38:29.513973', 'step': 21265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.546263', 'step': 21265, 'epoch': 3} {'type': 'loss', 'content': 0.04288865253329277, 'timestamp': '2025-09-30 22:38:29.563523', 'step': 21266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.608233', 'step': 21266, 'epoch': 3} {'type': 'loss', 'content': 0.09684848040342331, 'timestamp': '2025-09-30 22:38:29.613715', 'step': 21267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.646100', 'step': 21267, 'epoch': 3} {'type': 'loss', 'content': 0.057070717215538025, 'timestamp': '2025-09-30 22:38:29.675042', 'step': 21268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:29.719464', 'step': 21268, 'epoch': 3} {'type': 'loss', 'content': 0.048140618950128555, 'timestamp': '2025-09-30 22:38:29.723016', 'step': 21269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.759318', 'step': 21269, 'epoch': 3} {'type': 'loss', 'content': 0.10251965373754501, 'timestamp': '2025-09-30 22:38:29.763791', 'step': 21270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.794836', 'step': 21270, 'epoch': 3} {'type': 'loss', 'content': 0.031603358685970306, 'timestamp': '2025-09-30 22:38:29.798933', 'step': 21271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:29.844438', 'step': 21271, 'epoch': 3} {'type': 'loss', 'content': 0.04079155623912811, 'timestamp': '2025-09-30 22:38:29.871381', 'step': 21272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:29.916314', 'step': 21272, 'epoch': 3} {'type': 'loss', 'content': 0.07321052998304367, 'timestamp': '2025-09-30 22:38:29.920993', 'step': 21273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.954568', 'step': 21273, 'epoch': 3} {'type': 'loss', 'content': 0.06787807494401932, 'timestamp': '2025-09-30 22:38:29.959163', 'step': 21274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:29.995464', 'step': 21274, 'epoch': 3} {'type': 'loss', 'content': 0.07492800056934357, 'timestamp': '2025-09-30 22:38:30.000384', 'step': 21275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.032182', 'step': 21275, 'epoch': 3} {'type': 'loss', 'content': 0.13991765677928925, 'timestamp': '2025-09-30 22:38:30.058514', 'step': 21276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:30.090412', 'step': 21276, 'epoch': 3} {'type': 'loss', 'content': 0.05228620395064354, 'timestamp': '2025-09-30 22:38:30.094216', 'step': 21277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.127222', 'step': 21277, 'epoch': 3} {'type': 'loss', 'content': 0.13200005888938904, 'timestamp': '2025-09-30 22:38:30.143470', 'step': 21278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:30.175694', 'step': 21278, 'epoch': 3} {'type': 'loss', 'content': 0.07148950546979904, 'timestamp': '2025-09-30 22:38:30.179337', 'step': 21279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.213792', 'step': 21279, 'epoch': 3} {'type': 'loss', 'content': 0.10688982903957367, 'timestamp': '2025-09-30 22:38:30.240400', 'step': 21280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:30.272393', 'step': 21280, 'epoch': 3} {'type': 'loss', 'content': 0.04319950193166733, 'timestamp': '2025-09-30 22:38:30.291529', 'step': 21281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.337801', 'step': 21281, 'epoch': 3} {'type': 'loss', 'content': 0.07904279232025146, 'timestamp': '2025-09-30 22:38:30.342492', 'step': 21282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.378337', 'step': 21282, 'epoch': 3} {'type': 'loss', 'content': 0.05210018903017044, 'timestamp': '2025-09-30 22:38:30.385226', 'step': 21283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:30.422652', 'step': 21283, 'epoch': 3} {'type': 'loss', 'content': 0.13559739291667938, 'timestamp': '2025-09-30 22:38:30.450233', 'step': 21284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:30.489719', 'step': 21284, 'epoch': 3} {'type': 'loss', 'content': 0.08397427201271057, 'timestamp': '2025-09-30 22:38:30.493288', 'step': 21285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:30.530633', 'step': 21285, 'epoch': 3} {'type': 'loss', 'content': 0.03194008022546768, 'timestamp': '2025-09-30 22:38:30.548123', 'step': 21286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.593902', 'step': 21286, 'epoch': 3} {'type': 'loss', 'content': 0.09279455989599228, 'timestamp': '2025-09-30 22:38:30.611048', 'step': 21287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:30.656721', 'step': 21287, 'epoch': 3} {'type': 'loss', 'content': 0.07058484107255936, 'timestamp': '2025-09-30 22:38:30.681458', 'step': 21288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:30.713324', 'step': 21288, 'epoch': 3} {'type': 'loss', 'content': 0.06168973445892334, 'timestamp': '2025-09-30 22:38:30.717029', 'step': 21289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:30.749598', 'step': 21289, 'epoch': 3} {'type': 'loss', 'content': 0.12590807676315308, 'timestamp': '2025-09-30 22:38:30.753331', 'step': 21290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:30.796545', 'step': 21290, 'epoch': 3} {'type': 'loss', 'content': 0.06170964241027832, 'timestamp': '2025-09-30 22:38:30.812120', 'step': 21291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:30.858059', 'step': 21291, 'epoch': 3} {'type': 'loss', 'content': 0.06892445683479309, 'timestamp': '2025-09-30 22:38:30.896266', 'step': 21292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:30.929798', 'step': 21292, 'epoch': 3} {'type': 'loss', 'content': 0.055260952562093735, 'timestamp': '2025-09-30 22:38:30.946135', 'step': 21293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:30.989524', 'step': 21293, 'epoch': 3} {'type': 'loss', 'content': 0.04263215512037277, 'timestamp': '2025-09-30 22:38:31.005835', 'step': 21294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.039726', 'step': 21294, 'epoch': 3} {'type': 'loss', 'content': 0.02749374322593212, 'timestamp': '2025-09-30 22:38:31.055716', 'step': 21295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.100341', 'step': 21295, 'epoch': 3} {'type': 'loss', 'content': 0.09260217845439911, 'timestamp': '2025-09-30 22:38:31.126814', 'step': 21296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:31.159576', 'step': 21296, 'epoch': 3} {'type': 'loss', 'content': 0.055617284029722214, 'timestamp': '2025-09-30 22:38:31.164147', 'step': 21297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.196322', 'step': 21297, 'epoch': 3} {'type': 'loss', 'content': 0.111800417304039, 'timestamp': '2025-09-30 22:38:31.200393', 'step': 21298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:31.245624', 'step': 21298, 'epoch': 3} {'type': 'loss', 'content': 0.12261075526475906, 'timestamp': '2025-09-30 22:38:31.249943', 'step': 21299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.281380', 'step': 21299, 'epoch': 3} {'type': 'loss', 'content': 0.06788432598114014, 'timestamp': '2025-09-30 22:38:31.320443', 'step': 21300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.366049', 'step': 21300, 'epoch': 3} {'type': 'loss', 'content': 0.05786850303411484, 'timestamp': '2025-09-30 22:38:31.369912', 'step': 21301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:31.415821', 'step': 21301, 'epoch': 3} {'type': 'loss', 'content': 0.04679011180996895, 'timestamp': '2025-09-30 22:38:31.419529', 'step': 21302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.466079', 'step': 21302, 'epoch': 3} {'type': 'loss', 'content': 0.041155632585287094, 'timestamp': '2025-09-30 22:38:31.480229', 'step': 21303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:31.511678', 'step': 21303, 'epoch': 3} {'type': 'loss', 'content': 0.06532688438892365, 'timestamp': '2025-09-30 22:38:31.546089', 'step': 21304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.579359', 'step': 21304, 'epoch': 3} {'type': 'loss', 'content': 0.04993651062250137, 'timestamp': '2025-09-30 22:38:31.583336', 'step': 21305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.617950', 'step': 21305, 'epoch': 3} {'type': 'loss', 'content': 0.12509071826934814, 'timestamp': '2025-09-30 22:38:31.623754', 'step': 21306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:31.656308', 'step': 21306, 'epoch': 3} {'type': 'loss', 'content': 0.11793986707925797, 'timestamp': '2025-09-30 22:38:31.661373', 'step': 21307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.693434', 'step': 21307, 'epoch': 3} {'type': 'loss', 'content': 0.13805101811885834, 'timestamp': '2025-09-30 22:38:31.719102', 'step': 21308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.750641', 'step': 21308, 'epoch': 3} {'type': 'loss', 'content': 0.06926703453063965, 'timestamp': '2025-09-30 22:38:31.755643', 'step': 21309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:31.787603', 'step': 21309, 'epoch': 3} {'type': 'loss', 'content': 0.0560409314930439, 'timestamp': '2025-09-30 22:38:31.793792', 'step': 21310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.833525', 'step': 21310, 'epoch': 3} {'type': 'loss', 'content': 0.062174372375011444, 'timestamp': '2025-09-30 22:38:31.850533', 'step': 21311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:31.897403', 'step': 21311, 'epoch': 3} {'type': 'loss', 'content': 0.08331518620252609, 'timestamp': '2025-09-30 22:38:31.937372', 'step': 21312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:31.979035', 'step': 21312, 'epoch': 3} {'type': 'loss', 'content': 0.06473991274833679, 'timestamp': '2025-09-30 22:38:31.983281', 'step': 21313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.027437', 'step': 21313, 'epoch': 3} {'type': 'loss', 'content': 0.1518237441778183, 'timestamp': '2025-09-30 22:38:32.033172', 'step': 21314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.078033', 'step': 21314, 'epoch': 3} {'type': 'loss', 'content': 0.027074819430708885, 'timestamp': '2025-09-30 22:38:32.081704', 'step': 21315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:32.116399', 'step': 21315, 'epoch': 3} {'type': 'loss', 'content': 0.08368667960166931, 'timestamp': '2025-09-30 22:38:32.141688', 'step': 21316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.186977', 'step': 21316, 'epoch': 3} {'type': 'loss', 'content': 0.0328654982149601, 'timestamp': '2025-09-30 22:38:32.192012', 'step': 21317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.227100', 'step': 21317, 'epoch': 3} {'type': 'loss', 'content': 0.10376691073179245, 'timestamp': '2025-09-30 22:38:32.231919', 'step': 21318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.265892', 'step': 21318, 'epoch': 3} {'type': 'loss', 'content': 0.07565515488386154, 'timestamp': '2025-09-30 22:38:32.269586', 'step': 21319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.302507', 'step': 21319, 'epoch': 3} {'type': 'loss', 'content': 0.07053340971469879, 'timestamp': '2025-09-30 22:38:32.326969', 'step': 21320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.360379', 'step': 21320, 'epoch': 3} {'type': 'loss', 'content': 0.04493234306573868, 'timestamp': '2025-09-30 22:38:32.364534', 'step': 21321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.403116', 'step': 21321, 'epoch': 3} {'type': 'loss', 'content': 0.13662511110305786, 'timestamp': '2025-09-30 22:38:32.416385', 'step': 21322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.449117', 'step': 21322, 'epoch': 3} {'type': 'loss', 'content': 0.060548022389411926, 'timestamp': '2025-09-30 22:38:32.455780', 'step': 21323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:32.489039', 'step': 21323, 'epoch': 3} {'type': 'loss', 'content': 0.11011437326669693, 'timestamp': '2025-09-30 22:38:32.514029', 'step': 21324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.549582', 'step': 21324, 'epoch': 3} {'type': 'loss', 'content': 0.12522204220294952, 'timestamp': '2025-09-30 22:38:32.565385', 'step': 21325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.599082', 'step': 21325, 'epoch': 3} {'type': 'loss', 'content': 0.04877886176109314, 'timestamp': '2025-09-30 22:38:32.603606', 'step': 21326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:32.638218', 'step': 21326, 'epoch': 3} {'type': 'loss', 'content': 0.11138129979372025, 'timestamp': '2025-09-30 22:38:32.643513', 'step': 21327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:32.677652', 'step': 21327, 'epoch': 3} {'type': 'loss', 'content': 0.041981011629104614, 'timestamp': '2025-09-30 22:38:32.717321', 'step': 21328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.750106', 'step': 21328, 'epoch': 3} {'type': 'loss', 'content': 0.05663706734776497, 'timestamp': '2025-09-30 22:38:32.755071', 'step': 21329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.786451', 'step': 21329, 'epoch': 3} {'type': 'loss', 'content': 0.07178021967411041, 'timestamp': '2025-09-30 22:38:32.790353', 'step': 21330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:32.822039', 'step': 21330, 'epoch': 3} {'type': 'loss', 'content': 0.03169429674744606, 'timestamp': '2025-09-30 22:38:32.828419', 'step': 21331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:32.875007', 'step': 21331, 'epoch': 3} {'type': 'loss', 'content': 0.04329993948340416, 'timestamp': '2025-09-30 22:38:32.901742', 'step': 21332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:32.937333', 'step': 21332, 'epoch': 3} {'type': 'loss', 'content': 0.042208150029182434, 'timestamp': '2025-09-30 22:38:32.942963', 'step': 21333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:32.977522', 'step': 21333, 'epoch': 3} {'type': 'loss', 'content': 0.10425194352865219, 'timestamp': '2025-09-30 22:38:32.980798', 'step': 21334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:33.013595', 'step': 21334, 'epoch': 3} {'type': 'loss', 'content': 0.06679809093475342, 'timestamp': '2025-09-30 22:38:33.017612', 'step': 21335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.058283', 'step': 21335, 'epoch': 3} {'type': 'loss', 'content': 0.043593842536211014, 'timestamp': '2025-09-30 22:38:33.085503', 'step': 21336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.128105', 'step': 21336, 'epoch': 3} {'type': 'loss', 'content': 0.02800952084362507, 'timestamp': '2025-09-30 22:38:33.134483', 'step': 21337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.168249', 'step': 21337, 'epoch': 3} {'type': 'loss', 'content': 0.01711302250623703, 'timestamp': '2025-09-30 22:38:33.173841', 'step': 21338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.205712', 'step': 21338, 'epoch': 3} {'type': 'loss', 'content': 0.0910755842924118, 'timestamp': '2025-09-30 22:38:33.210747', 'step': 21339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.244376', 'step': 21339, 'epoch': 3} {'type': 'loss', 'content': 0.062193356454372406, 'timestamp': '2025-09-30 22:38:33.271535', 'step': 21340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.316056', 'step': 21340, 'epoch': 3} {'type': 'loss', 'content': 0.04336412623524666, 'timestamp': '2025-09-30 22:38:33.320607', 'step': 21341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.363847', 'step': 21341, 'epoch': 3} {'type': 'loss', 'content': 0.11047914624214172, 'timestamp': '2025-09-30 22:38:33.369685', 'step': 21342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.401361', 'step': 21342, 'epoch': 3} {'type': 'loss', 'content': 0.07308969646692276, 'timestamp': '2025-09-30 22:38:33.405835', 'step': 21343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:33.448098', 'step': 21343, 'epoch': 3} {'type': 'loss', 'content': 0.058653514832258224, 'timestamp': '2025-09-30 22:38:33.474345', 'step': 21344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.514545', 'step': 21344, 'epoch': 3} {'type': 'loss', 'content': 0.036840829998254776, 'timestamp': '2025-09-30 22:38:33.523342', 'step': 21345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.568785', 'step': 21345, 'epoch': 3} {'type': 'loss', 'content': 0.050850242376327515, 'timestamp': '2025-09-30 22:38:33.577397', 'step': 21346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.612607', 'step': 21346, 'epoch': 3} {'type': 'loss', 'content': 0.06195909157395363, 'timestamp': '2025-09-30 22:38:33.618799', 'step': 21347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:33.664430', 'step': 21347, 'epoch': 3} {'type': 'loss', 'content': 0.042079806327819824, 'timestamp': '2025-09-30 22:38:33.693404', 'step': 21348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.726200', 'step': 21348, 'epoch': 3} {'type': 'loss', 'content': 0.039986591786146164, 'timestamp': '2025-09-30 22:38:33.733107', 'step': 21349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:33.774776', 'step': 21349, 'epoch': 3} {'type': 'loss', 'content': 0.10088274627923965, 'timestamp': '2025-09-30 22:38:33.781597', 'step': 21350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:33.825406', 'step': 21350, 'epoch': 3} {'type': 'loss', 'content': 0.0563405342400074, 'timestamp': '2025-09-30 22:38:33.829830', 'step': 21351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:33.863469', 'step': 21351, 'epoch': 3} {'type': 'loss', 'content': 0.03387221321463585, 'timestamp': '2025-09-30 22:38:33.901724', 'step': 21352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:33.935023', 'step': 21352, 'epoch': 3} {'type': 'loss', 'content': 0.05906527861952782, 'timestamp': '2025-09-30 22:38:33.938298', 'step': 21353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:33.974861', 'step': 21353, 'epoch': 3} {'type': 'loss', 'content': 0.0357094332575798, 'timestamp': '2025-09-30 22:38:33.979963', 'step': 21354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.011884', 'step': 21354, 'epoch': 3} {'type': 'loss', 'content': 0.097356416285038, 'timestamp': '2025-09-30 22:38:34.018240', 'step': 21355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.062752', 'step': 21355, 'epoch': 3} {'type': 'loss', 'content': 0.0928288921713829, 'timestamp': '2025-09-30 22:38:34.090740', 'step': 21356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.130920', 'step': 21356, 'epoch': 3} {'type': 'loss', 'content': 0.06339012086391449, 'timestamp': '2025-09-30 22:38:34.134870', 'step': 21357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.166338', 'step': 21357, 'epoch': 3} {'type': 'loss', 'content': 0.034197524189949036, 'timestamp': '2025-09-30 22:38:34.169349', 'step': 21358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.200954', 'step': 21358, 'epoch': 3} {'type': 'loss', 'content': 0.059421174228191376, 'timestamp': '2025-09-30 22:38:34.216037', 'step': 21359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.259372', 'step': 21359, 'epoch': 3} {'type': 'loss', 'content': 0.032517578452825546, 'timestamp': '2025-09-30 22:38:34.286554', 'step': 21360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.322477', 'step': 21360, 'epoch': 3} {'type': 'loss', 'content': 0.05406380444765091, 'timestamp': '2025-09-30 22:38:34.329121', 'step': 21361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.362501', 'step': 21361, 'epoch': 3} {'type': 'loss', 'content': 0.08605224639177322, 'timestamp': '2025-09-30 22:38:34.367388', 'step': 21362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.399144', 'step': 21362, 'epoch': 3} {'type': 'loss', 'content': 0.01979617401957512, 'timestamp': '2025-09-30 22:38:34.404316', 'step': 21363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.436694', 'step': 21363, 'epoch': 3} {'type': 'loss', 'content': 0.08917911350727081, 'timestamp': '2025-09-30 22:38:34.462122', 'step': 21364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:34.495376', 'step': 21364, 'epoch': 3} {'type': 'loss', 'content': 0.12259382009506226, 'timestamp': '2025-09-30 22:38:34.499749', 'step': 21365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.533590', 'step': 21365, 'epoch': 3} {'type': 'loss', 'content': 0.10882952064275742, 'timestamp': '2025-09-30 22:38:34.540223', 'step': 21366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:34.573736', 'step': 21366, 'epoch': 3} {'type': 'loss', 'content': 0.12778986990451813, 'timestamp': '2025-09-30 22:38:34.578124', 'step': 21367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.612236', 'step': 21367, 'epoch': 3} {'type': 'loss', 'content': 0.08816997706890106, 'timestamp': '2025-09-30 22:38:34.649135', 'step': 21368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.680603', 'step': 21368, 'epoch': 3} {'type': 'loss', 'content': 0.043462663888931274, 'timestamp': '2025-09-30 22:38:34.687181', 'step': 21369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:34.720882', 'step': 21369, 'epoch': 3} {'type': 'loss', 'content': 0.020859362557530403, 'timestamp': '2025-09-30 22:38:34.725862', 'step': 21370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.757856', 'step': 21370, 'epoch': 3} {'type': 'loss', 'content': 0.0321044884622097, 'timestamp': '2025-09-30 22:38:34.774756', 'step': 21371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:34.816918', 'step': 21371, 'epoch': 3} {'type': 'loss', 'content': 0.04098973795771599, 'timestamp': '2025-09-30 22:38:34.844413', 'step': 21372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.886117', 'step': 21372, 'epoch': 3} {'type': 'loss', 'content': 0.09367968887090683, 'timestamp': '2025-09-30 22:38:34.889628', 'step': 21373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:34.921893', 'step': 21373, 'epoch': 3} {'type': 'loss', 'content': 0.08570137619972229, 'timestamp': '2025-09-30 22:38:34.925533', 'step': 21374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:34.957717', 'step': 21374, 'epoch': 3} {'type': 'loss', 'content': 0.09536857157945633, 'timestamp': '2025-09-30 22:38:34.972687', 'step': 21375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:35.004867', 'step': 21375, 'epoch': 3} {'type': 'loss', 'content': 0.07844575494527817, 'timestamp': '2025-09-30 22:38:35.030856', 'step': 21376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:35.074040', 'step': 21376, 'epoch': 3} {'type': 'loss', 'content': 0.05844671651721001, 'timestamp': '2025-09-30 22:38:35.077463', 'step': 21377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:35.111825', 'step': 21377, 'epoch': 3} {'type': 'loss', 'content': 0.050327252596616745, 'timestamp': '2025-09-30 22:38:35.125864', 'step': 21378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.157746', 'step': 21378, 'epoch': 3} {'type': 'loss', 'content': 0.10365309566259384, 'timestamp': '2025-09-30 22:38:35.162726', 'step': 21379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:35.196802', 'step': 21379, 'epoch': 3} {'type': 'loss', 'content': 0.04422586038708687, 'timestamp': '2025-09-30 22:38:35.224452', 'step': 21380, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.256813', 'step': 21380, 'epoch': 3} {'type': 'loss', 'content': 0.04473637044429779, 'timestamp': '2025-09-30 22:38:35.270153', 'step': 21381, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:35.312914', 'step': 21381, 'epoch': 3} {'type': 'loss', 'content': 0.02793269231915474, 'timestamp': '2025-09-30 22:38:35.326237', 'step': 21382, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:35.358194', 'step': 21382, 'epoch': 3} {'type': 'loss', 'content': 0.08863480389118195, 'timestamp': '2025-09-30 22:38:35.371139', 'step': 21383, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:35.404614', 'step': 21383, 'epoch': 3} {'type': 'loss', 'content': 0.08340062201023102, 'timestamp': '2025-09-30 22:38:35.431323', 'step': 21384, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.468156', 'step': 21384, 'epoch': 3} {'type': 'loss', 'content': 0.09989721328020096, 'timestamp': '2025-09-30 22:38:35.473855', 'step': 21385, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.507522', 'step': 21385, 'epoch': 3} {'type': 'loss', 'content': 0.012684888206422329, 'timestamp': '2025-09-30 22:38:35.513112', 'step': 21386, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:35.543094', 'step': 21386, 'epoch': 3} {'type': 'loss', 'content': 0.027819959446787834, 'timestamp': '2025-09-30 22:38:35.558289', 'step': 21387, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.600039', 'step': 21387, 'epoch': 3} {'type': 'loss', 'content': 0.059238001704216, 'timestamp': '2025-09-30 22:38:35.628880', 'step': 21388, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.661287', 'step': 21388, 'epoch': 3} {'type': 'loss', 'content': 0.05463540181517601, 'timestamp': '2025-09-30 22:38:35.664230', 'step': 21389, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.694503', 'step': 21389, 'epoch': 3} {'type': 'loss', 'content': 0.0160541795194149, 'timestamp': '2025-09-30 22:38:35.707605', 'step': 21390, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:35.738396', 'step': 21390, 'epoch': 3} {'type': 'loss', 'content': 0.07040291279554367, 'timestamp': '2025-09-30 22:38:35.743511', 'step': 21391, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.777936', 'step': 21391, 'epoch': 3} {'type': 'loss', 'content': 0.05874110758304596, 'timestamp': '2025-09-30 22:38:35.803039', 'step': 21392, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:35.844826', 'step': 21392, 'epoch': 3} {'type': 'loss', 'content': 0.07418320327997208, 'timestamp': '2025-09-30 22:38:35.847780', 'step': 21393, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:35.880804', 'step': 21393, 'epoch': 3} {'type': 'loss', 'content': 0.05828865244984627, 'timestamp': '2025-09-30 22:38:35.883925', 'step': 21394, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:35.916364', 'step': 21394, 'epoch': 3} {'type': 'loss', 'content': 0.05381257086992264, 'timestamp': '2025-09-30 22:38:35.920054', 'step': 21395, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:35.962955', 'step': 21395, 'epoch': 3} {'type': 'loss', 'content': 0.032818518579006195, 'timestamp': '2025-09-30 22:38:35.988641', 'step': 21396, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.021594', 'step': 21396, 'epoch': 3} {'type': 'loss', 'content': 0.07531498372554779, 'timestamp': '2025-09-30 22:38:36.025427', 'step': 21397, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.064539', 'step': 21397, 'epoch': 3} {'type': 'loss', 'content': 0.044114988297224045, 'timestamp': '2025-09-30 22:38:36.069662', 'step': 21398, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.112764', 'step': 21398, 'epoch': 3} {'type': 'loss', 'content': 0.06722118705511093, 'timestamp': '2025-09-30 22:38:36.119342', 'step': 21399, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.152247', 'step': 21399, 'epoch': 3} {'type': 'loss', 'content': 0.09322840720415115, 'timestamp': '2025-09-30 22:38:36.186450', 'step': 21400, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.217131', 'step': 21400, 'epoch': 3} {'type': 'loss', 'content': 0.08430995047092438, 'timestamp': '2025-09-30 22:38:36.223057', 'step': 21401, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.256295', 'step': 21401, 'epoch': 3} {'type': 'loss', 'content': 0.06929396837949753, 'timestamp': '2025-09-30 22:38:36.262676', 'step': 21402, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:36.298334', 'step': 21402, 'epoch': 3} {'type': 'loss', 'content': 0.07543150335550308, 'timestamp': '2025-09-30 22:38:36.301236', 'step': 21403, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.333635', 'step': 21403, 'epoch': 3} {'type': 'loss', 'content': 0.0686054602265358, 'timestamp': '2025-09-30 22:38:36.362020', 'step': 21404, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.396381', 'step': 21404, 'epoch': 3} {'type': 'loss', 'content': 0.06280865520238876, 'timestamp': '2025-09-30 22:38:36.399803', 'step': 21405, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:36.433040', 'step': 21405, 'epoch': 3} {'type': 'loss', 'content': 0.07761608064174652, 'timestamp': '2025-09-30 22:38:36.437410', 'step': 21406, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.471353', 'step': 21406, 'epoch': 3} {'type': 'loss', 'content': 0.08551828563213348, 'timestamp': '2025-09-30 22:38:36.487314', 'step': 21407, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:36.520814', 'step': 21407, 'epoch': 3} {'type': 'loss', 'content': 0.053029268980026245, 'timestamp': '2025-09-30 22:38:36.546273', 'step': 21408, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:36.579107', 'step': 21408, 'epoch': 3} {'type': 'loss', 'content': 0.0881393626332283, 'timestamp': '2025-09-30 22:38:36.583390', 'step': 21409, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.617660', 'step': 21409, 'epoch': 3} {'type': 'loss', 'content': 0.06470275670289993, 'timestamp': '2025-09-30 22:38:36.620941', 'step': 21410, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.665614', 'step': 21410, 'epoch': 3} {'type': 'loss', 'content': 0.05558587238192558, 'timestamp': '2025-09-30 22:38:36.670544', 'step': 21411, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:36.703901', 'step': 21411, 'epoch': 3} {'type': 'loss', 'content': 0.07903186231851578, 'timestamp': '2025-09-30 22:38:36.730196', 'step': 21412, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.764628', 'step': 21412, 'epoch': 3} {'type': 'loss', 'content': 0.12961429357528687, 'timestamp': '2025-09-30 22:38:36.781016', 'step': 21413, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.813542', 'step': 21413, 'epoch': 3} {'type': 'loss', 'content': 0.035658616572618484, 'timestamp': '2025-09-30 22:38:36.821931', 'step': 21414, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:36.853724', 'step': 21414, 'epoch': 3} {'type': 'loss', 'content': 0.03305413946509361, 'timestamp': '2025-09-30 22:38:36.857645', 'step': 21415, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:36.888783', 'step': 21415, 'epoch': 3} {'type': 'loss', 'content': 0.04955011606216431, 'timestamp': '2025-09-30 22:38:36.915455', 'step': 21416, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:36.948482', 'step': 21416, 'epoch': 3} {'type': 'loss', 'content': 0.029069168493151665, 'timestamp': '2025-09-30 22:38:36.953448', 'step': 21417, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:36.986050', 'step': 21417, 'epoch': 3} {'type': 'loss', 'content': 0.0941029042005539, 'timestamp': '2025-09-30 22:38:36.990528', 'step': 21418, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.024827', 'step': 21418, 'epoch': 3} {'type': 'loss', 'content': 0.07670001685619354, 'timestamp': '2025-09-30 22:38:37.028344', 'step': 21419, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:37.069542', 'step': 21419, 'epoch': 3} {'type': 'loss', 'content': 0.08360061049461365, 'timestamp': '2025-09-30 22:38:37.094917', 'step': 21420, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:37.132278', 'step': 21420, 'epoch': 3} {'type': 'loss', 'content': 0.05444377660751343, 'timestamp': '2025-09-30 22:38:37.136266', 'step': 21421, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:37.169649', 'step': 21421, 'epoch': 3} {'type': 'loss', 'content': 0.09882267564535141, 'timestamp': '2025-09-30 22:38:37.175048', 'step': 21422, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.208950', 'step': 21422, 'epoch': 3} {'type': 'loss', 'content': 0.09599314630031586, 'timestamp': '2025-09-30 22:38:37.213121', 'step': 21423, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:37.247048', 'step': 21423, 'epoch': 3} {'type': 'loss', 'content': 0.08110105991363525, 'timestamp': '2025-09-30 22:38:37.273512', 'step': 21424, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.305156', 'step': 21424, 'epoch': 3} {'type': 'loss', 'content': 0.021833453327417374, 'timestamp': '2025-09-30 22:38:37.309900', 'step': 21425, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.343583', 'step': 21425, 'epoch': 3} {'type': 'loss', 'content': 0.042286474257707596, 'timestamp': '2025-09-30 22:38:37.347664', 'step': 21426, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:37.379983', 'step': 21426, 'epoch': 3} {'type': 'loss', 'content': 0.08952895551919937, 'timestamp': '2025-09-30 22:38:37.383739', 'step': 21427, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:37.418505', 'step': 21427, 'epoch': 3} {'type': 'loss', 'content': 0.07010706514120102, 'timestamp': '2025-09-30 22:38:37.446784', 'step': 21428, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:37.481444', 'step': 21428, 'epoch': 3} {'type': 'loss', 'content': 0.07749984413385391, 'timestamp': '2025-09-30 22:38:37.485246', 'step': 21429, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.522730', 'step': 21429, 'epoch': 3} {'type': 'loss', 'content': 0.05315264314413071, 'timestamp': '2025-09-30 22:38:37.530708', 'step': 21430, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.575942', 'step': 21430, 'epoch': 3} {'type': 'loss', 'content': 0.08087285608053207, 'timestamp': '2025-09-30 22:38:37.579685', 'step': 21431, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.611718', 'step': 21431, 'epoch': 3} {'type': 'loss', 'content': 0.08431106060743332, 'timestamp': '2025-09-30 22:38:37.637523', 'step': 21432, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:37.678590', 'step': 21432, 'epoch': 3} {'type': 'loss', 'content': 0.0438154973089695, 'timestamp': '2025-09-30 22:38:37.683599', 'step': 21433, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.716916', 'step': 21433, 'epoch': 3} {'type': 'loss', 'content': 0.07697410136461258, 'timestamp': '2025-09-30 22:38:37.728029', 'step': 21434, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:37.761423', 'step': 21434, 'epoch': 3} {'type': 'loss', 'content': 0.08630979061126709, 'timestamp': '2025-09-30 22:38:37.767691', 'step': 21435, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:37.803169', 'step': 21435, 'epoch': 3} {'type': 'loss', 'content': 0.0991683304309845, 'timestamp': '2025-09-30 22:38:37.845059', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:38:46.032595', 'step': 21436, 'epoch': 3} {'type': 'pplx', 'content': 8013.841407411097, 'timestamp': '2025-09-30 22:38:46.039611', 'step': 21436, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.081058', 'step': 21436, 'epoch': 3} {'type': 'loss', 'content': 0.03782728314399719, 'timestamp': '2025-09-30 22:38:46.086292', 'step': 21437, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.128834', 'step': 21437, 'epoch': 3} {'type': 'loss', 'content': 0.07720545679330826, 'timestamp': '2025-09-30 22:38:46.132687', 'step': 21438, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.177406', 'step': 21438, 'epoch': 3} {'type': 'loss', 'content': 0.11022306978702545, 'timestamp': '2025-09-30 22:38:46.190272', 'step': 21439, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.225153', 'step': 21439, 'epoch': 3} {'type': 'loss', 'content': 0.03854327276349068, 'timestamp': '2025-09-30 22:38:46.253570', 'step': 21440, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.285759', 'step': 21440, 'epoch': 3} {'type': 'loss', 'content': 0.08236119896173477, 'timestamp': '2025-09-30 22:38:46.301797', 'step': 21441, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:46.334415', 'step': 21441, 'epoch': 3} {'type': 'loss', 'content': 0.062206268310546875, 'timestamp': '2025-09-30 22:38:46.338342', 'step': 21442, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.370364', 'step': 21442, 'epoch': 3} {'type': 'loss', 'content': 0.034543439745903015, 'timestamp': '2025-09-30 22:38:46.376805', 'step': 21443, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.410154', 'step': 21443, 'epoch': 3} {'type': 'loss', 'content': 0.045450616627931595, 'timestamp': '2025-09-30 22:38:46.448933', 'step': 21444, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.482166', 'step': 21444, 'epoch': 3} {'type': 'loss', 'content': 0.06838591396808624, 'timestamp': '2025-09-30 22:38:46.487142', 'step': 21445, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.519917', 'step': 21445, 'epoch': 3} {'type': 'loss', 'content': 0.04905309900641441, 'timestamp': '2025-09-30 22:38:46.523554', 'step': 21446, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:46.565431', 'step': 21446, 'epoch': 3} {'type': 'loss', 'content': 0.1314675360918045, 'timestamp': '2025-09-30 22:38:46.569160', 'step': 21447, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.602416', 'step': 21447, 'epoch': 3} {'type': 'loss', 'content': 0.04046997055411339, 'timestamp': '2025-09-30 22:38:46.641009', 'step': 21448, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.684788', 'step': 21448, 'epoch': 3} {'type': 'loss', 'content': 0.04656192287802696, 'timestamp': '2025-09-30 22:38:46.688791', 'step': 21449, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.722888', 'step': 21449, 'epoch': 3} {'type': 'loss', 'content': 0.12351587414741516, 'timestamp': '2025-09-30 22:38:46.728170', 'step': 21450, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:46.770563', 'step': 21450, 'epoch': 3} {'type': 'loss', 'content': 0.08852402865886688, 'timestamp': '2025-09-30 22:38:46.775365', 'step': 21451, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.808550', 'step': 21451, 'epoch': 3} {'type': 'loss', 'content': 0.07677854597568512, 'timestamp': '2025-09-30 22:38:46.834529', 'step': 21452, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:46.879002', 'step': 21452, 'epoch': 3} {'type': 'loss', 'content': 0.038416050374507904, 'timestamp': '2025-09-30 22:38:46.897927', 'step': 21453, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:46.943839', 'step': 21453, 'epoch': 3} {'type': 'loss', 'content': 0.07964791357517242, 'timestamp': '2025-09-30 22:38:46.949650', 'step': 21454, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:46.982646', 'step': 21454, 'epoch': 3} {'type': 'loss', 'content': 0.004009165335446596, 'timestamp': '2025-09-30 22:38:46.988637', 'step': 21455, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.023561', 'step': 21455, 'epoch': 3} {'type': 'loss', 'content': 0.0352153405547142, 'timestamp': '2025-09-30 22:38:47.049984', 'step': 21456, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:47.082611', 'step': 21456, 'epoch': 3} {'type': 'loss', 'content': 0.015063845552504063, 'timestamp': '2025-09-30 22:38:47.087812', 'step': 21457, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.121093', 'step': 21457, 'epoch': 3} {'type': 'loss', 'content': 0.05226464569568634, 'timestamp': '2025-09-30 22:38:47.125400', 'step': 21458, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.159569', 'step': 21458, 'epoch': 3} {'type': 'loss', 'content': 0.05953875184059143, 'timestamp': '2025-09-30 22:38:47.163407', 'step': 21459, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.210012', 'step': 21459, 'epoch': 3} {'type': 'loss', 'content': 0.04896942153573036, 'timestamp': '2025-09-30 22:38:47.235223', 'step': 21460, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.267694', 'step': 21460, 'epoch': 3} {'type': 'loss', 'content': 0.03721444308757782, 'timestamp': '2025-09-30 22:38:47.272463', 'step': 21461, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:47.305805', 'step': 21461, 'epoch': 3} {'type': 'loss', 'content': 0.040583740919828415, 'timestamp': '2025-09-30 22:38:47.309814', 'step': 21462, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:47.342275', 'step': 21462, 'epoch': 3} {'type': 'loss', 'content': 0.054959844797849655, 'timestamp': '2025-09-30 22:38:47.346744', 'step': 21463, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:47.378277', 'step': 21463, 'epoch': 3} {'type': 'loss', 'content': 0.03307593986392021, 'timestamp': '2025-09-30 22:38:47.403940', 'step': 21464, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:47.446448', 'step': 21464, 'epoch': 3} {'type': 'loss', 'content': 0.016856014728546143, 'timestamp': '2025-09-30 22:38:47.451754', 'step': 21465, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:47.484222', 'step': 21465, 'epoch': 3} {'type': 'loss', 'content': 0.04672451689839363, 'timestamp': '2025-09-30 22:38:47.490465', 'step': 21466, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:47.525432', 'step': 21466, 'epoch': 3} {'type': 'loss', 'content': 0.016228875145316124, 'timestamp': '2025-09-30 22:38:47.529264', 'step': 21467, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.563400', 'step': 21467, 'epoch': 3} {'type': 'loss', 'content': 0.05140890181064606, 'timestamp': '2025-09-30 22:38:47.588834', 'step': 21468, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.622785', 'step': 21468, 'epoch': 3} {'type': 'loss', 'content': 0.03854605183005333, 'timestamp': '2025-09-30 22:38:47.629107', 'step': 21469, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:47.662782', 'step': 21469, 'epoch': 3} {'type': 'loss', 'content': 0.08215751498937607, 'timestamp': '2025-09-30 22:38:47.666098', 'step': 21470, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:47.707031', 'step': 21470, 'epoch': 3} {'type': 'loss', 'content': 0.10706396400928497, 'timestamp': '2025-09-30 22:38:47.712305', 'step': 21471, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:47.744685', 'step': 21471, 'epoch': 3} {'type': 'loss', 'content': 0.04708120599389076, 'timestamp': '2025-09-30 22:38:47.770221', 'step': 21472, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:47.802144', 'step': 21472, 'epoch': 3} {'type': 'loss', 'content': 0.05414756387472153, 'timestamp': '2025-09-30 22:38:47.816635', 'step': 21473, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:47.848285', 'step': 21473, 'epoch': 3} {'type': 'loss', 'content': 0.02547033131122589, 'timestamp': '2025-09-30 22:38:47.861416', 'step': 21474, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:47.902796', 'step': 21474, 'epoch': 3} {'type': 'loss', 'content': 0.08543974906206131, 'timestamp': '2025-09-30 22:38:47.906799', 'step': 21475, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:47.938903', 'step': 21475, 'epoch': 3} {'type': 'loss', 'content': 0.13389140367507935, 'timestamp': '2025-09-30 22:38:47.965748', 'step': 21476, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:48.001288', 'step': 21476, 'epoch': 3} {'type': 'loss', 'content': 0.060787588357925415, 'timestamp': '2025-09-30 22:38:48.006329', 'step': 21477, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:48.039746', 'step': 21477, 'epoch': 3} {'type': 'loss', 'content': 0.03782399371266365, 'timestamp': '2025-09-30 22:38:48.044159', 'step': 21478, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:48.077222', 'step': 21478, 'epoch': 3} {'type': 'loss', 'content': 0.07529868930578232, 'timestamp': '2025-09-30 22:38:48.081368', 'step': 21479, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.113520', 'step': 21479, 'epoch': 3} {'type': 'loss', 'content': 0.041205648332834244, 'timestamp': '2025-09-30 22:38:48.139727', 'step': 21480, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.172803', 'step': 21480, 'epoch': 3} {'type': 'loss', 'content': 0.043437469750642776, 'timestamp': '2025-09-30 22:38:48.176747', 'step': 21481, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.208435', 'step': 21481, 'epoch': 3} {'type': 'loss', 'content': 0.03828340396285057, 'timestamp': '2025-09-30 22:38:48.214392', 'step': 21482, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.247323', 'step': 21482, 'epoch': 3} {'type': 'loss', 'content': 0.06214149668812752, 'timestamp': '2025-09-30 22:38:48.251480', 'step': 21483, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:48.283498', 'step': 21483, 'epoch': 3} {'type': 'loss', 'content': 0.08827139437198639, 'timestamp': '2025-09-30 22:38:48.309106', 'step': 21484, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:48.341588', 'step': 21484, 'epoch': 3} {'type': 'loss', 'content': 0.053829971700906754, 'timestamp': '2025-09-30 22:38:48.350455', 'step': 21485, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.387025', 'step': 21485, 'epoch': 3} {'type': 'loss', 'content': 0.07137270271778107, 'timestamp': '2025-09-30 22:38:48.389888', 'step': 21486, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.420846', 'step': 21486, 'epoch': 3} {'type': 'loss', 'content': 0.047662701457738876, 'timestamp': '2025-09-30 22:38:48.427345', 'step': 21487, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.461508', 'step': 21487, 'epoch': 3} {'type': 'loss', 'content': 0.07164991647005081, 'timestamp': '2025-09-30 22:38:48.496950', 'step': 21488, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.529413', 'step': 21488, 'epoch': 3} {'type': 'loss', 'content': 0.09365511685609818, 'timestamp': '2025-09-30 22:38:48.533620', 'step': 21489, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:48.566055', 'step': 21489, 'epoch': 3} {'type': 'loss', 'content': 0.08528529107570648, 'timestamp': '2025-09-30 22:38:48.569477', 'step': 21490, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:48.602151', 'step': 21490, 'epoch': 3} {'type': 'loss', 'content': 0.0576825849711895, 'timestamp': '2025-09-30 22:38:48.615708', 'step': 21491, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:48.657873', 'step': 21491, 'epoch': 3} {'type': 'loss', 'content': 0.05942586809396744, 'timestamp': '2025-09-30 22:38:48.691225', 'step': 21492, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.733529', 'step': 21492, 'epoch': 3} {'type': 'loss', 'content': 0.029918035492300987, 'timestamp': '2025-09-30 22:38:48.749480', 'step': 21493, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:48.782335', 'step': 21493, 'epoch': 3} {'type': 'loss', 'content': 0.041837941855192184, 'timestamp': '2025-09-30 22:38:48.786611', 'step': 21494, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.819928', 'step': 21494, 'epoch': 3} {'type': 'loss', 'content': 0.07087843120098114, 'timestamp': '2025-09-30 22:38:48.824084', 'step': 21495, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:48.857270', 'step': 21495, 'epoch': 3} {'type': 'loss', 'content': 0.062385864555835724, 'timestamp': '2025-09-30 22:38:48.883850', 'step': 21496, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:48.914041', 'step': 21496, 'epoch': 3} {'type': 'loss', 'content': 0.06115731596946716, 'timestamp': '2025-09-30 22:38:48.916979', 'step': 21497, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:48.948962', 'step': 21497, 'epoch': 3} {'type': 'loss', 'content': 0.05289476737380028, 'timestamp': '2025-09-30 22:38:48.961276', 'step': 21498, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:49.005017', 'step': 21498, 'epoch': 3} {'type': 'loss', 'content': 0.032323092222213745, 'timestamp': '2025-09-30 22:38:49.009356', 'step': 21499, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:49.042102', 'step': 21499, 'epoch': 3} {'type': 'loss', 'content': 0.1256335973739624, 'timestamp': '2025-09-30 22:38:49.068998', 'step': 21500, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 21500', 'timestamp': '2025-09-30 22:38:54.057376', 'step': 21500, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:54.097857', 'step': 21500, 'epoch': 3} {'type': 'loss', 'content': 0.1303105503320694, 'timestamp': '2025-09-30 22:38:54.102295', 'step': 21501, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:54.144698', 'step': 21501, 'epoch': 3} {'type': 'loss', 'content': 0.04371168091893196, 'timestamp': '2025-09-30 22:38:54.156028', 'step': 21502, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:54.189317', 'step': 21502, 'epoch': 3} {'type': 'loss', 'content': 0.029007036238908768, 'timestamp': '2025-09-30 22:38:54.201644', 'step': 21503, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:54.235261', 'step': 21503, 'epoch': 3} {'type': 'loss', 'content': 0.04300781711935997, 'timestamp': '2025-09-30 22:38:54.261124', 'step': 21504, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:54.293871', 'step': 21504, 'epoch': 3} {'type': 'loss', 'content': 0.11798176914453506, 'timestamp': '2025-09-30 22:38:54.298028', 'step': 21505, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:54.328747', 'step': 21505, 'epoch': 3} {'type': 'loss', 'content': 0.010708211921155453, 'timestamp': '2025-09-30 22:38:54.331782', 'step': 21506, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:54.362833', 'step': 21506, 'epoch': 3} {'type': 'loss', 'content': 0.07325964421033859, 'timestamp': '2025-09-30 22:38:54.367354', 'step': 21507, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:54.410515', 'step': 21507, 'epoch': 3} {'type': 'loss', 'content': 0.061566732823848724, 'timestamp': '2025-09-30 22:38:54.439741', 'step': 21508, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:54.471582', 'step': 21508, 'epoch': 3} {'type': 'loss', 'content': 0.0645207017660141, 'timestamp': '2025-09-30 22:38:54.478413', 'step': 21509, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:54.510798', 'step': 21509, 'epoch': 3} {'type': 'loss', 'content': 0.0839347317814827, 'timestamp': '2025-09-30 22:38:54.515628', 'step': 21510, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:54.548367', 'step': 21510, 'epoch': 3} {'type': 'loss', 'content': 0.06810910999774933, 'timestamp': '2025-09-30 22:38:54.553670', 'step': 21511, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:54.598804', 'step': 21511, 'epoch': 3} {'type': 'loss', 'content': 0.12584643065929413, 'timestamp': '2025-09-30 22:38:54.626304', 'step': 21512, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:54.664848', 'step': 21512, 'epoch': 3} {'type': 'loss', 'content': 0.043988507241010666, 'timestamp': '2025-09-30 22:38:54.677233', 'step': 21513, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:54.718295', 'step': 21513, 'epoch': 3} {'type': 'loss', 'content': 0.07887580245733261, 'timestamp': '2025-09-30 22:38:54.724986', 'step': 21514, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:54.756722', 'step': 21514, 'epoch': 3} {'type': 'loss', 'content': 0.1089312806725502, 'timestamp': '2025-09-30 22:38:54.761193', 'step': 21515, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:54.804786', 'step': 21515, 'epoch': 3} {'type': 'loss', 'content': 0.08050380647182465, 'timestamp': '2025-09-30 22:38:54.831272', 'step': 21516, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:54.862706', 'step': 21516, 'epoch': 3} {'type': 'loss', 'content': 0.04530598968267441, 'timestamp': '2025-09-30 22:38:54.865546', 'step': 21517, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:54.904401', 'step': 21517, 'epoch': 3} {'type': 'loss', 'content': 0.012059577740728855, 'timestamp': '2025-09-30 22:38:54.916035', 'step': 21518, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:54.946668', 'step': 21518, 'epoch': 3} {'type': 'loss', 'content': 0.03197585046291351, 'timestamp': '2025-09-30 22:38:54.951515', 'step': 21519, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:54.983528', 'step': 21519, 'epoch': 3} {'type': 'loss', 'content': 0.03256869688630104, 'timestamp': '2025-09-30 22:38:55.018690', 'step': 21520, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:55.052292', 'step': 21520, 'epoch': 3} {'type': 'loss', 'content': 0.03119419515132904, 'timestamp': '2025-09-30 22:38:55.056511', 'step': 21521, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.090887', 'step': 21521, 'epoch': 3} {'type': 'loss', 'content': 0.07539226114749908, 'timestamp': '2025-09-30 22:38:55.093880', 'step': 21522, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:55.135179', 'step': 21522, 'epoch': 3} {'type': 'loss', 'content': 0.08376152068376541, 'timestamp': '2025-09-30 22:38:55.141410', 'step': 21523, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.173259', 'step': 21523, 'epoch': 3} {'type': 'loss', 'content': 0.11732490360736847, 'timestamp': '2025-09-30 22:38:55.200747', 'step': 21524, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:55.231602', 'step': 21524, 'epoch': 3} {'type': 'loss', 'content': 0.029917526990175247, 'timestamp': '2025-09-30 22:38:55.237148', 'step': 21525, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.278334', 'step': 21525, 'epoch': 3} {'type': 'loss', 'content': 0.02188079059123993, 'timestamp': '2025-09-30 22:38:55.282445', 'step': 21526, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:55.315279', 'step': 21526, 'epoch': 3} {'type': 'loss', 'content': 0.013564660213887691, 'timestamp': '2025-09-30 22:38:55.326761', 'step': 21527, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.360678', 'step': 21527, 'epoch': 3} {'type': 'loss', 'content': 0.07155714184045792, 'timestamp': '2025-09-30 22:38:55.385707', 'step': 21528, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:55.416757', 'step': 21528, 'epoch': 3} {'type': 'loss', 'content': 0.020881179720163345, 'timestamp': '2025-09-30 22:38:55.421379', 'step': 21529, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:55.452379', 'step': 21529, 'epoch': 3} {'type': 'loss', 'content': 0.08668175339698792, 'timestamp': '2025-09-30 22:38:55.456110', 'step': 21530, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:55.495374', 'step': 21530, 'epoch': 3} {'type': 'loss', 'content': 0.037456270307302475, 'timestamp': '2025-09-30 22:38:55.500501', 'step': 21531, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.546140', 'step': 21531, 'epoch': 3} {'type': 'loss', 'content': 0.04316055402159691, 'timestamp': '2025-09-30 22:38:55.572074', 'step': 21532, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.612959', 'step': 21532, 'epoch': 3} {'type': 'loss', 'content': 0.056361667811870575, 'timestamp': '2025-09-30 22:38:55.623436', 'step': 21533, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.655760', 'step': 21533, 'epoch': 3} {'type': 'loss', 'content': 0.08942492306232452, 'timestamp': '2025-09-30 22:38:55.667003', 'step': 21534, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.701015', 'step': 21534, 'epoch': 3} {'type': 'loss', 'content': 0.03620438650250435, 'timestamp': '2025-09-30 22:38:55.713798', 'step': 21535, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.754792', 'step': 21535, 'epoch': 3} {'type': 'loss', 'content': 0.10610916465520859, 'timestamp': '2025-09-30 22:38:55.781222', 'step': 21536, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:55.814365', 'step': 21536, 'epoch': 3} {'type': 'loss', 'content': 0.07393430173397064, 'timestamp': '2025-09-30 22:38:55.821167', 'step': 21537, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:55.863471', 'step': 21537, 'epoch': 3} {'type': 'loss', 'content': 0.0033583752810955048, 'timestamp': '2025-09-30 22:38:55.867466', 'step': 21538, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.900538', 'step': 21538, 'epoch': 3} {'type': 'loss', 'content': 0.06607316434383392, 'timestamp': '2025-09-30 22:38:55.915525', 'step': 21539, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:55.955015', 'step': 21539, 'epoch': 3} {'type': 'loss', 'content': 0.017447112128138542, 'timestamp': '2025-09-30 22:38:55.981633', 'step': 21540, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.012978', 'step': 21540, 'epoch': 3} {'type': 'loss', 'content': 0.07188694179058075, 'timestamp': '2025-09-30 22:38:56.018384', 'step': 21541, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.050846', 'step': 21541, 'epoch': 3} {'type': 'loss', 'content': 0.07162324339151382, 'timestamp': '2025-09-30 22:38:56.054456', 'step': 21542, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.086151', 'step': 21542, 'epoch': 3} {'type': 'loss', 'content': 0.09488676488399506, 'timestamp': '2025-09-30 22:38:56.102847', 'step': 21543, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.134592', 'step': 21543, 'epoch': 3} {'type': 'loss', 'content': 0.07697170972824097, 'timestamp': '2025-09-30 22:38:56.160822', 'step': 21544, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.192080', 'step': 21544, 'epoch': 3} {'type': 'loss', 'content': 0.1308334916830063, 'timestamp': '2025-09-30 22:38:56.205590', 'step': 21545, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.245965', 'step': 21545, 'epoch': 3} {'type': 'loss', 'content': 0.04949226230382919, 'timestamp': '2025-09-30 22:38:56.249249', 'step': 21546, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.284100', 'step': 21546, 'epoch': 3} {'type': 'loss', 'content': 0.0179849062114954, 'timestamp': '2025-09-30 22:38:56.289973', 'step': 21547, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.322441', 'step': 21547, 'epoch': 3} {'type': 'loss', 'content': 0.06582742929458618, 'timestamp': '2025-09-30 22:38:56.347547', 'step': 21548, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.385089', 'step': 21548, 'epoch': 3} {'type': 'loss', 'content': 0.1093667522072792, 'timestamp': '2025-09-30 22:38:56.389687', 'step': 21549, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.421753', 'step': 21549, 'epoch': 3} {'type': 'loss', 'content': 0.10270316898822784, 'timestamp': '2025-09-30 22:38:56.425711', 'step': 21550, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.459196', 'step': 21550, 'epoch': 3} {'type': 'loss', 'content': 0.07017114758491516, 'timestamp': '2025-09-30 22:38:56.465776', 'step': 21551, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.500827', 'step': 21551, 'epoch': 3} {'type': 'loss', 'content': 0.03249949961900711, 'timestamp': '2025-09-30 22:38:56.529057', 'step': 21552, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.562571', 'step': 21552, 'epoch': 3} {'type': 'loss', 'content': 0.04823797941207886, 'timestamp': '2025-09-30 22:38:56.567100', 'step': 21553, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.602901', 'step': 21553, 'epoch': 3} {'type': 'loss', 'content': 0.03194060176610947, 'timestamp': '2025-09-30 22:38:56.607057', 'step': 21554, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.640324', 'step': 21554, 'epoch': 3} {'type': 'loss', 'content': 0.06859090924263, 'timestamp': '2025-09-30 22:38:56.650502', 'step': 21555, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.685755', 'step': 21555, 'epoch': 3} {'type': 'loss', 'content': 0.0529981330037117, 'timestamp': '2025-09-30 22:38:56.714818', 'step': 21556, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.751829', 'step': 21556, 'epoch': 3} {'type': 'loss', 'content': 0.027467289939522743, 'timestamp': '2025-09-30 22:38:56.755606', 'step': 21557, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.788476', 'step': 21557, 'epoch': 3} {'type': 'loss', 'content': 0.09105489403009415, 'timestamp': '2025-09-30 22:38:56.793488', 'step': 21558, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.829439', 'step': 21558, 'epoch': 3} {'type': 'loss', 'content': 0.06255510449409485, 'timestamp': '2025-09-30 22:38:56.833658', 'step': 21559, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:56.885707', 'step': 21559, 'epoch': 3} {'type': 'loss', 'content': 0.1401566118001938, 'timestamp': '2025-09-30 22:38:56.912433', 'step': 21560, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:56.943864', 'step': 21560, 'epoch': 3} {'type': 'loss', 'content': 0.06807693839073181, 'timestamp': '2025-09-30 22:38:56.950525', 'step': 21561, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:56.982682', 'step': 21561, 'epoch': 3} {'type': 'loss', 'content': 0.018663836643099785, 'timestamp': '2025-09-30 22:38:56.986433', 'step': 21562, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:57.026719', 'step': 21562, 'epoch': 3} {'type': 'loss', 'content': 0.1258435994386673, 'timestamp': '2025-09-30 22:38:57.044494', 'step': 21563, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:57.083680', 'step': 21563, 'epoch': 3} {'type': 'loss', 'content': 0.08555872738361359, 'timestamp': '2025-09-30 22:38:57.108485', 'step': 21564, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.139955', 'step': 21564, 'epoch': 3} {'type': 'loss', 'content': 0.047551121562719345, 'timestamp': '2025-09-30 22:38:57.146249', 'step': 21565, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.179170', 'step': 21565, 'epoch': 3} {'type': 'loss', 'content': 0.029552211984992027, 'timestamp': '2025-09-30 22:38:57.183312', 'step': 21566, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:57.216473', 'step': 21566, 'epoch': 3} {'type': 'loss', 'content': 0.009555820375680923, 'timestamp': '2025-09-30 22:38:57.234089', 'step': 21567, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.278400', 'step': 21567, 'epoch': 3} {'type': 'loss', 'content': 0.03800752013921738, 'timestamp': '2025-09-30 22:38:57.305584', 'step': 21568, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:38:57.337560', 'step': 21568, 'epoch': 3} {'type': 'loss', 'content': 0.12514720857143402, 'timestamp': '2025-09-30 22:38:57.344994', 'step': 21569, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:57.382580', 'step': 21569, 'epoch': 3} {'type': 'loss', 'content': 0.023472268134355545, 'timestamp': '2025-09-30 22:38:57.388693', 'step': 21570, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.421347', 'step': 21570, 'epoch': 3} {'type': 'loss', 'content': 0.029829515144228935, 'timestamp': '2025-09-30 22:38:57.428237', 'step': 21571, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:57.461781', 'step': 21571, 'epoch': 3} {'type': 'loss', 'content': 0.05992036312818527, 'timestamp': '2025-09-30 22:38:57.486096', 'step': 21572, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:57.529504', 'step': 21572, 'epoch': 3} {'type': 'loss', 'content': 0.054657574743032455, 'timestamp': '2025-09-30 22:38:57.532508', 'step': 21573, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:57.565339', 'step': 21573, 'epoch': 3} {'type': 'loss', 'content': 0.05931316316127777, 'timestamp': '2025-09-30 22:38:57.577955', 'step': 21574, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.611233', 'step': 21574, 'epoch': 3} {'type': 'loss', 'content': 0.09552459418773651, 'timestamp': '2025-09-30 22:38:57.616829', 'step': 21575, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:57.650055', 'step': 21575, 'epoch': 3} {'type': 'loss', 'content': 0.0364886112511158, 'timestamp': '2025-09-30 22:38:57.676570', 'step': 21576, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:57.724261', 'step': 21576, 'epoch': 3} {'type': 'loss', 'content': 0.06795641034841537, 'timestamp': '2025-09-30 22:38:57.728243', 'step': 21577, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:57.760734', 'step': 21577, 'epoch': 3} {'type': 'loss', 'content': 0.10668390989303589, 'timestamp': '2025-09-30 22:38:57.764573', 'step': 21578, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.805591', 'step': 21578, 'epoch': 3} {'type': 'loss', 'content': 0.021847963333129883, 'timestamp': '2025-09-30 22:38:57.815488', 'step': 21579, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:57.849476', 'step': 21579, 'epoch': 3} {'type': 'loss', 'content': 0.12492389976978302, 'timestamp': '2025-09-30 22:38:57.877691', 'step': 21580, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:57.910573', 'step': 21580, 'epoch': 3} {'type': 'loss', 'content': 0.017887931317090988, 'timestamp': '2025-09-30 22:38:57.925142', 'step': 21581, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:57.965302', 'step': 21581, 'epoch': 3} {'type': 'loss', 'content': 0.04636222496628761, 'timestamp': '2025-09-30 22:38:57.969742', 'step': 21582, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:58.002005', 'step': 21582, 'epoch': 3} {'type': 'loss', 'content': 0.024556346237659454, 'timestamp': '2025-09-30 22:38:58.011298', 'step': 21583, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.047685', 'step': 21583, 'epoch': 3} {'type': 'loss', 'content': 0.04616961255669594, 'timestamp': '2025-09-30 22:38:58.072060', 'step': 21584, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:58.104594', 'step': 21584, 'epoch': 3} {'type': 'loss', 'content': 0.02450251765549183, 'timestamp': '2025-09-30 22:38:58.107570', 'step': 21585, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.138762', 'step': 21585, 'epoch': 3} {'type': 'loss', 'content': 0.01981598325073719, 'timestamp': '2025-09-30 22:38:58.143788', 'step': 21586, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.175048', 'step': 21586, 'epoch': 3} {'type': 'loss', 'content': 0.13900725543498993, 'timestamp': '2025-09-30 22:38:58.178774', 'step': 21587, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.208903', 'step': 21587, 'epoch': 3} {'type': 'loss', 'content': 0.03466024249792099, 'timestamp': '2025-09-30 22:38:58.236576', 'step': 21588, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.276746', 'step': 21588, 'epoch': 3} {'type': 'loss', 'content': 0.0322304368019104, 'timestamp': '2025-09-30 22:38:58.287967', 'step': 21589, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.320119', 'step': 21589, 'epoch': 3} {'type': 'loss', 'content': 0.04834887385368347, 'timestamp': '2025-09-30 22:38:58.331091', 'step': 21590, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:58.361474', 'step': 21590, 'epoch': 3} {'type': 'loss', 'content': 0.01070171408355236, 'timestamp': '2025-09-30 22:38:58.367163', 'step': 21591, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.399982', 'step': 21591, 'epoch': 3} {'type': 'loss', 'content': 0.02272782102227211, 'timestamp': '2025-09-30 22:38:58.424781', 'step': 21592, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.459936', 'step': 21592, 'epoch': 3} {'type': 'loss', 'content': 0.030793607234954834, 'timestamp': '2025-09-30 22:38:58.463391', 'step': 21593, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.502382', 'step': 21593, 'epoch': 3} {'type': 'loss', 'content': 0.11076963692903519, 'timestamp': '2025-09-30 22:38:58.508088', 'step': 21594, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.558417', 'step': 21594, 'epoch': 3} {'type': 'loss', 'content': 0.10776076465845108, 'timestamp': '2025-09-30 22:38:58.566503', 'step': 21595, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.600478', 'step': 21595, 'epoch': 3} {'type': 'loss', 'content': 0.02463608980178833, 'timestamp': '2025-09-30 22:38:58.625725', 'step': 21596, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:58.657662', 'step': 21596, 'epoch': 3} {'type': 'loss', 'content': 0.03880291432142258, 'timestamp': '2025-09-30 22:38:58.662800', 'step': 21597, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.694410', 'step': 21597, 'epoch': 3} {'type': 'loss', 'content': 0.03937555104494095, 'timestamp': '2025-09-30 22:38:58.697274', 'step': 21598, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.727392', 'step': 21598, 'epoch': 3} {'type': 'loss', 'content': 0.09444107860326767, 'timestamp': '2025-09-30 22:38:58.732041', 'step': 21599, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:58.767743', 'step': 21599, 'epoch': 3} {'type': 'loss', 'content': 0.06314583867788315, 'timestamp': '2025-09-30 22:38:58.794164', 'step': 21600, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.827026', 'step': 21600, 'epoch': 3} {'type': 'loss', 'content': 0.018526487052440643, 'timestamp': '2025-09-30 22:38:58.830039', 'step': 21601, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:58.861443', 'step': 21601, 'epoch': 3} {'type': 'loss', 'content': 0.05332876369357109, 'timestamp': '2025-09-30 22:38:58.863959', 'step': 21602, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:58.901097', 'step': 21602, 'epoch': 3} {'type': 'loss', 'content': 0.06136147305369377, 'timestamp': '2025-09-30 22:38:58.910925', 'step': 21603, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:58.947871', 'step': 21603, 'epoch': 3} {'type': 'loss', 'content': 0.0593198798596859, 'timestamp': '2025-09-30 22:38:58.973147', 'step': 21604, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:59.004299', 'step': 21604, 'epoch': 3} {'type': 'loss', 'content': 0.06607349961996078, 'timestamp': '2025-09-30 22:38:59.009067', 'step': 21605, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.039932', 'step': 21605, 'epoch': 3} {'type': 'loss', 'content': 0.04144396632909775, 'timestamp': '2025-09-30 22:38:59.045293', 'step': 21606, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.076116', 'step': 21606, 'epoch': 3} {'type': 'loss', 'content': 0.0166423711925745, 'timestamp': '2025-09-30 22:38:59.080393', 'step': 21607, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.112896', 'step': 21607, 'epoch': 3} {'type': 'loss', 'content': 0.10260692983865738, 'timestamp': '2025-09-30 22:38:59.143042', 'step': 21608, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.174947', 'step': 21608, 'epoch': 3} {'type': 'loss', 'content': 0.10568077862262726, 'timestamp': '2025-09-30 22:38:59.180879', 'step': 21609, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:59.213631', 'step': 21609, 'epoch': 3} {'type': 'loss', 'content': 0.11644014716148376, 'timestamp': '2025-09-30 22:38:59.218158', 'step': 21610, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.257689', 'step': 21610, 'epoch': 3} {'type': 'loss', 'content': 0.042262885719537735, 'timestamp': '2025-09-30 22:38:59.261587', 'step': 21611, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.302157', 'step': 21611, 'epoch': 3} {'type': 'loss', 'content': 0.02213943935930729, 'timestamp': '2025-09-30 22:38:59.327702', 'step': 21612, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.361058', 'step': 21612, 'epoch': 3} {'type': 'loss', 'content': 0.08619862049818039, 'timestamp': '2025-09-30 22:38:59.365545', 'step': 21613, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:59.396348', 'step': 21613, 'epoch': 3} {'type': 'loss', 'content': 0.0631452351808548, 'timestamp': '2025-09-30 22:38:59.402247', 'step': 21614, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.433852', 'step': 21614, 'epoch': 3} {'type': 'loss', 'content': 0.04121889919042587, 'timestamp': '2025-09-30 22:38:59.439735', 'step': 21615, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:38:59.470846', 'step': 21615, 'epoch': 3} {'type': 'loss', 'content': 0.05148135498166084, 'timestamp': '2025-09-30 22:38:59.496865', 'step': 21616, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.533274', 'step': 21616, 'epoch': 3} {'type': 'loss', 'content': 0.056216780096292496, 'timestamp': '2025-09-30 22:38:59.537223', 'step': 21617, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:59.568866', 'step': 21617, 'epoch': 3} {'type': 'loss', 'content': 0.07527776807546616, 'timestamp': '2025-09-30 22:38:59.578223', 'step': 21618, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:59.614807', 'step': 21618, 'epoch': 3} {'type': 'loss', 'content': 0.07626187801361084, 'timestamp': '2025-09-30 22:38:59.622936', 'step': 21619, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:59.660415', 'step': 21619, 'epoch': 3} {'type': 'loss', 'content': 0.02314440719783306, 'timestamp': '2025-09-30 22:38:59.684808', 'step': 21620, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.719411', 'step': 21620, 'epoch': 3} {'type': 'loss', 'content': 0.057687874883413315, 'timestamp': '2025-09-30 22:38:59.722646', 'step': 21621, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:38:59.754015', 'step': 21621, 'epoch': 3} {'type': 'loss', 'content': 0.0692378506064415, 'timestamp': '2025-09-30 22:38:59.759217', 'step': 21622, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:38:59.791351', 'step': 21622, 'epoch': 3} {'type': 'loss', 'content': 0.05791693180799484, 'timestamp': '2025-09-30 22:38:59.794063', 'step': 21623, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:38:59.826614', 'step': 21623, 'epoch': 3} {'type': 'loss', 'content': 0.025141118094325066, 'timestamp': '2025-09-30 22:38:59.853360', 'step': 21624, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.894941', 'step': 21624, 'epoch': 3} {'type': 'loss', 'content': 0.1084945872426033, 'timestamp': '2025-09-30 22:38:59.898637', 'step': 21625, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:38:59.942982', 'step': 21625, 'epoch': 3} {'type': 'loss', 'content': 0.08355272561311722, 'timestamp': '2025-09-30 22:38:59.948315', 'step': 21626, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:38:59.979567', 'step': 21626, 'epoch': 3} {'type': 'loss', 'content': 0.07658141851425171, 'timestamp': '2025-09-30 22:38:59.982955', 'step': 21627, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.023800', 'step': 21627, 'epoch': 3} {'type': 'loss', 'content': 0.06181252375245094, 'timestamp': '2025-09-30 22:39:00.048513', 'step': 21628, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.087571', 'step': 21628, 'epoch': 3} {'type': 'loss', 'content': 0.0279945507645607, 'timestamp': '2025-09-30 22:39:00.091569', 'step': 21629, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.122641', 'step': 21629, 'epoch': 3} {'type': 'loss', 'content': 0.046600405126810074, 'timestamp': '2025-09-30 22:39:00.126832', 'step': 21630, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:00.160019', 'step': 21630, 'epoch': 3} {'type': 'loss', 'content': 0.1459069401025772, 'timestamp': '2025-09-30 22:39:00.163757', 'step': 21631, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:00.195328', 'step': 21631, 'epoch': 3} {'type': 'loss', 'content': 0.06561344116926193, 'timestamp': '2025-09-30 22:39:00.221757', 'step': 21632, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.261188', 'step': 21632, 'epoch': 3} {'type': 'loss', 'content': 0.031497225165367126, 'timestamp': '2025-09-30 22:39:00.265266', 'step': 21633, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.297157', 'step': 21633, 'epoch': 3} {'type': 'loss', 'content': 0.10434187203645706, 'timestamp': '2025-09-30 22:39:00.309418', 'step': 21634, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:00.344097', 'step': 21634, 'epoch': 3} {'type': 'loss', 'content': 0.04531608521938324, 'timestamp': '2025-09-30 22:39:00.347029', 'step': 21635, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:00.383019', 'step': 21635, 'epoch': 3} {'type': 'loss', 'content': 0.07524796575307846, 'timestamp': '2025-09-30 22:39:00.415878', 'step': 21636, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 224], 'flops': 6644714900096}, 'timestamp': '2025-09-30 22:39:00.454880', 'step': 21636, 'epoch': 3} {'type': 'loss', 'content': 0.06312736123800278, 'timestamp': '2025-09-30 22:39:00.459895', 'step': 21637, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:00.493585', 'step': 21637, 'epoch': 3} {'type': 'loss', 'content': 0.05414354428648949, 'timestamp': '2025-09-30 22:39:00.498473', 'step': 21638, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:00.531417', 'step': 21638, 'epoch': 3} {'type': 'loss', 'content': 0.06841973960399628, 'timestamp': '2025-09-30 22:39:00.544416', 'step': 21639, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.583883', 'step': 21639, 'epoch': 3} {'type': 'loss', 'content': 0.06192789971828461, 'timestamp': '2025-09-30 22:39:00.609358', 'step': 21640, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.650108', 'step': 21640, 'epoch': 3} {'type': 'loss', 'content': 0.055173225700855255, 'timestamp': '2025-09-30 22:39:00.654010', 'step': 21641, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.687697', 'step': 21641, 'epoch': 3} {'type': 'loss', 'content': 0.06052275374531746, 'timestamp': '2025-09-30 22:39:00.691914', 'step': 21642, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:00.727250', 'step': 21642, 'epoch': 3} {'type': 'loss', 'content': 0.04761373624205589, 'timestamp': '2025-09-30 22:39:00.730585', 'step': 21643, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.761643', 'step': 21643, 'epoch': 3} {'type': 'loss', 'content': 0.04160602018237114, 'timestamp': '2025-09-30 22:39:00.787179', 'step': 21644, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.831220', 'step': 21644, 'epoch': 3} {'type': 'loss', 'content': 0.016252996399998665, 'timestamp': '2025-09-30 22:39:00.841000', 'step': 21645, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:00.881001', 'step': 21645, 'epoch': 3} {'type': 'loss', 'content': 0.022310251370072365, 'timestamp': '2025-09-30 22:39:00.884560', 'step': 21646, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:00.917270', 'step': 21646, 'epoch': 3} {'type': 'loss', 'content': 0.040926892310380936, 'timestamp': '2025-09-30 22:39:00.921804', 'step': 21647, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:00.954938', 'step': 21647, 'epoch': 3} {'type': 'loss', 'content': 0.07151499390602112, 'timestamp': '2025-09-30 22:39:00.979689', 'step': 21648, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.011767', 'step': 21648, 'epoch': 3} {'type': 'loss', 'content': 0.06726597994565964, 'timestamp': '2025-09-30 22:39:01.015304', 'step': 21649, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:01.055775', 'step': 21649, 'epoch': 3} {'type': 'loss', 'content': 0.08739764988422394, 'timestamp': '2025-09-30 22:39:01.060286', 'step': 21650, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.092339', 'step': 21650, 'epoch': 3} {'type': 'loss', 'content': 0.0652729719877243, 'timestamp': '2025-09-30 22:39:01.096307', 'step': 21651, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:01.128911', 'step': 21651, 'epoch': 3} {'type': 'loss', 'content': 0.08147221803665161, 'timestamp': '2025-09-30 22:39:01.154815', 'step': 21652, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:01.188167', 'step': 21652, 'epoch': 3} {'type': 'loss', 'content': 0.01565689407289028, 'timestamp': '2025-09-30 22:39:01.192557', 'step': 21653, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.226481', 'step': 21653, 'epoch': 3} {'type': 'loss', 'content': 0.009883991442620754, 'timestamp': '2025-09-30 22:39:01.238470', 'step': 21654, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.283059', 'step': 21654, 'epoch': 3} {'type': 'loss', 'content': 0.01416166964918375, 'timestamp': '2025-09-30 22:39:01.286133', 'step': 21655, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.330665', 'step': 21655, 'epoch': 3} {'type': 'loss', 'content': 0.07023942470550537, 'timestamp': '2025-09-30 22:39:01.362693', 'step': 21656, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.395418', 'step': 21656, 'epoch': 3} {'type': 'loss', 'content': 0.07096844911575317, 'timestamp': '2025-09-30 22:39:01.401137', 'step': 21657, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.431163', 'step': 21657, 'epoch': 3} {'type': 'loss', 'content': 0.11472511291503906, 'timestamp': '2025-09-30 22:39:01.434599', 'step': 21658, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:01.465935', 'step': 21658, 'epoch': 3} {'type': 'loss', 'content': 0.027299819514155388, 'timestamp': '2025-09-30 22:39:01.471636', 'step': 21659, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.503697', 'step': 21659, 'epoch': 3} {'type': 'loss', 'content': 0.0804828330874443, 'timestamp': '2025-09-30 22:39:01.529939', 'step': 21660, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.562866', 'step': 21660, 'epoch': 3} {'type': 'loss', 'content': 0.17162179946899414, 'timestamp': '2025-09-30 22:39:01.575438', 'step': 21661, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:01.614708', 'step': 21661, 'epoch': 3} {'type': 'loss', 'content': 0.042865488678216934, 'timestamp': '2025-09-30 22:39:01.618065', 'step': 21662, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.650210', 'step': 21662, 'epoch': 3} {'type': 'loss', 'content': 0.08049064129590988, 'timestamp': '2025-09-30 22:39:01.654343', 'step': 21663, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.685978', 'step': 21663, 'epoch': 3} {'type': 'loss', 'content': 0.0476052388548851, 'timestamp': '2025-09-30 22:39:01.710594', 'step': 21664, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.744162', 'step': 21664, 'epoch': 3} {'type': 'loss', 'content': 0.041413888335227966, 'timestamp': '2025-09-30 22:39:01.758804', 'step': 21665, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.798482', 'step': 21665, 'epoch': 3} {'type': 'loss', 'content': 0.0638262927532196, 'timestamp': '2025-09-30 22:39:01.801985', 'step': 21666, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.832610', 'step': 21666, 'epoch': 3} {'type': 'loss', 'content': 0.08403270691633224, 'timestamp': '2025-09-30 22:39:01.837177', 'step': 21667, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.869622', 'step': 21667, 'epoch': 3} {'type': 'loss', 'content': 0.0734657272696495, 'timestamp': '2025-09-30 22:39:01.896714', 'step': 21668, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:01.933291', 'step': 21668, 'epoch': 3} {'type': 'loss', 'content': 0.037433408200740814, 'timestamp': '2025-09-30 22:39:01.946108', 'step': 21669, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:01.987569', 'step': 21669, 'epoch': 3} {'type': 'loss', 'content': 0.07117224484682083, 'timestamp': '2025-09-30 22:39:02.003395', 'step': 21670, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.047381', 'step': 21670, 'epoch': 3} {'type': 'loss', 'content': 0.013805166818201542, 'timestamp': '2025-09-30 22:39:02.051635', 'step': 21671, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.083334', 'step': 21671, 'epoch': 3} {'type': 'loss', 'content': 0.06272317469120026, 'timestamp': '2025-09-30 22:39:02.111684', 'step': 21672, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:02.144676', 'step': 21672, 'epoch': 3} {'type': 'loss', 'content': 0.05880819633603096, 'timestamp': '2025-09-30 22:39:02.149928', 'step': 21673, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:02.184630', 'step': 21673, 'epoch': 3} {'type': 'loss', 'content': 0.023486683145165443, 'timestamp': '2025-09-30 22:39:02.202273', 'step': 21674, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.245113', 'step': 21674, 'epoch': 3} {'type': 'loss', 'content': 0.02525276131927967, 'timestamp': '2025-09-30 22:39:02.250016', 'step': 21675, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:02.281300', 'step': 21675, 'epoch': 3} {'type': 'loss', 'content': 0.09707492589950562, 'timestamp': '2025-09-30 22:39:02.306367', 'step': 21676, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:02.345262', 'step': 21676, 'epoch': 3} {'type': 'loss', 'content': 0.06224570795893669, 'timestamp': '2025-09-30 22:39:02.351054', 'step': 21677, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:02.384714', 'step': 21677, 'epoch': 3} {'type': 'loss', 'content': 0.019197523593902588, 'timestamp': '2025-09-30 22:39:02.388769', 'step': 21678, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:02.421991', 'step': 21678, 'epoch': 3} {'type': 'loss', 'content': 0.06205670163035393, 'timestamp': '2025-09-30 22:39:02.426346', 'step': 21679, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:02.467075', 'step': 21679, 'epoch': 3} {'type': 'loss', 'content': 0.06067206710577011, 'timestamp': '2025-09-30 22:39:02.491867', 'step': 21680, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.523935', 'step': 21680, 'epoch': 3} {'type': 'loss', 'content': 0.10740912705659866, 'timestamp': '2025-09-30 22:39:02.528888', 'step': 21681, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:02.563858', 'step': 21681, 'epoch': 3} {'type': 'loss', 'content': 0.06034858524799347, 'timestamp': '2025-09-30 22:39:02.568103', 'step': 21682, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:02.600462', 'step': 21682, 'epoch': 3} {'type': 'loss', 'content': 0.028880322352051735, 'timestamp': '2025-09-30 22:39:02.606854', 'step': 21683, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.638513', 'step': 21683, 'epoch': 3} {'type': 'loss', 'content': 0.07252661138772964, 'timestamp': '2025-09-30 22:39:02.666239', 'step': 21684, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.703550', 'step': 21684, 'epoch': 3} {'type': 'loss', 'content': 0.1023988425731659, 'timestamp': '2025-09-30 22:39:02.707634', 'step': 21685, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:02.743719', 'step': 21685, 'epoch': 3} {'type': 'loss', 'content': 0.09051187336444855, 'timestamp': '2025-09-30 22:39:02.749422', 'step': 21686, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.783487', 'step': 21686, 'epoch': 3} {'type': 'loss', 'content': 0.07953258603811264, 'timestamp': '2025-09-30 22:39:02.789075', 'step': 21687, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.821841', 'step': 21687, 'epoch': 3} {'type': 'loss', 'content': 0.07150378078222275, 'timestamp': '2025-09-30 22:39:02.851095', 'step': 21688, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.885045', 'step': 21688, 'epoch': 3} {'type': 'loss', 'content': 0.04312886670231819, 'timestamp': '2025-09-30 22:39:02.902359', 'step': 21689, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:02.943018', 'step': 21689, 'epoch': 3} {'type': 'loss', 'content': 0.07067503035068512, 'timestamp': '2025-09-30 22:39:02.946547', 'step': 21690, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:02.988169', 'step': 21690, 'epoch': 3} {'type': 'loss', 'content': 0.050844691693782806, 'timestamp': '2025-09-30 22:39:02.999266', 'step': 21691, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.040068', 'step': 21691, 'epoch': 3} {'type': 'loss', 'content': 0.07300027459859848, 'timestamp': '2025-09-30 22:39:03.071155', 'step': 21692, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:03.111862', 'step': 21692, 'epoch': 3} {'type': 'loss', 'content': 0.04905674606561661, 'timestamp': '2025-09-30 22:39:03.124849', 'step': 21693, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.158467', 'step': 21693, 'epoch': 3} {'type': 'loss', 'content': 0.06492502987384796, 'timestamp': '2025-09-30 22:39:03.172614', 'step': 21694, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.207448', 'step': 21694, 'epoch': 3} {'type': 'loss', 'content': 0.026105161756277084, 'timestamp': '2025-09-30 22:39:03.219900', 'step': 21695, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.252481', 'step': 21695, 'epoch': 3} {'type': 'loss', 'content': 0.054548997431993484, 'timestamp': '2025-09-30 22:39:03.281608', 'step': 21696, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.316383', 'step': 21696, 'epoch': 3} {'type': 'loss', 'content': 0.04004000499844551, 'timestamp': '2025-09-30 22:39:03.319458', 'step': 21697, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.351688', 'step': 21697, 'epoch': 3} {'type': 'loss', 'content': 0.05417381227016449, 'timestamp': '2025-09-30 22:39:03.358406', 'step': 21698, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.395595', 'step': 21698, 'epoch': 3} {'type': 'loss', 'content': 0.06363927572965622, 'timestamp': '2025-09-30 22:39:03.398491', 'step': 21699, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.434710', 'step': 21699, 'epoch': 3} {'type': 'loss', 'content': 0.0723128393292427, 'timestamp': '2025-09-30 22:39:03.459639', 'step': 21700, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.502297', 'step': 21700, 'epoch': 3} {'type': 'loss', 'content': 0.011477562598884106, 'timestamp': '2025-09-30 22:39:03.507426', 'step': 21701, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.539394', 'step': 21701, 'epoch': 3} {'type': 'loss', 'content': 0.053388431668281555, 'timestamp': '2025-09-30 22:39:03.546058', 'step': 21702, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.578484', 'step': 21702, 'epoch': 3} {'type': 'loss', 'content': 0.13309204578399658, 'timestamp': '2025-09-30 22:39:03.587138', 'step': 21703, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.617763', 'step': 21703, 'epoch': 3} {'type': 'loss', 'content': 0.06081828847527504, 'timestamp': '2025-09-30 22:39:03.642646', 'step': 21704, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.688761', 'step': 21704, 'epoch': 3} {'type': 'loss', 'content': 0.037464745342731476, 'timestamp': '2025-09-30 22:39:03.691859', 'step': 21705, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.730466', 'step': 21705, 'epoch': 3} {'type': 'loss', 'content': 0.040040306746959686, 'timestamp': '2025-09-30 22:39:03.735445', 'step': 21706, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.767941', 'step': 21706, 'epoch': 3} {'type': 'loss', 'content': 0.09924409538507462, 'timestamp': '2025-09-30 22:39:03.775847', 'step': 21707, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.808777', 'step': 21707, 'epoch': 3} {'type': 'loss', 'content': 0.06236346438527107, 'timestamp': '2025-09-30 22:39:03.833562', 'step': 21708, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:03.866472', 'step': 21708, 'epoch': 3} {'type': 'loss', 'content': 0.03935699537396431, 'timestamp': '2025-09-30 22:39:03.869477', 'step': 21709, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.902413', 'step': 21709, 'epoch': 3} {'type': 'loss', 'content': 0.07934561371803284, 'timestamp': '2025-09-30 22:39:03.910025', 'step': 21710, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:03.943264', 'step': 21710, 'epoch': 3} {'type': 'loss', 'content': 0.1163136214017868, 'timestamp': '2025-09-30 22:39:03.948894', 'step': 21711, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:03.982482', 'step': 21711, 'epoch': 3} {'type': 'loss', 'content': 0.026733333244919777, 'timestamp': '2025-09-30 22:39:04.017110', 'step': 21712, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:04.047785', 'step': 21712, 'epoch': 3} {'type': 'loss', 'content': 0.08585221320390701, 'timestamp': '2025-09-30 22:39:04.062888', 'step': 21713, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:04.107057', 'step': 21713, 'epoch': 3} {'type': 'loss', 'content': 0.07225488871335983, 'timestamp': '2025-09-30 22:39:04.122498', 'step': 21714, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.166314', 'step': 21714, 'epoch': 3} {'type': 'loss', 'content': 0.041497815400362015, 'timestamp': '2025-09-30 22:39:04.170680', 'step': 21715, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.208727', 'step': 21715, 'epoch': 3} {'type': 'loss', 'content': 0.10338890552520752, 'timestamp': '2025-09-30 22:39:04.236047', 'step': 21716, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.280819', 'step': 21716, 'epoch': 3} {'type': 'loss', 'content': 0.058277834206819534, 'timestamp': '2025-09-30 22:39:04.299364', 'step': 21717, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.342286', 'step': 21717, 'epoch': 3} {'type': 'loss', 'content': 0.11074648797512054, 'timestamp': '2025-09-30 22:39:04.346418', 'step': 21718, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.379157', 'step': 21718, 'epoch': 3} {'type': 'loss', 'content': 0.10579149425029755, 'timestamp': '2025-09-30 22:39:04.385803', 'step': 21719, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:04.430935', 'step': 21719, 'epoch': 3} {'type': 'loss', 'content': 0.09231643378734589, 'timestamp': '2025-09-30 22:39:04.460623', 'step': 21720, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.492862', 'step': 21720, 'epoch': 3} {'type': 'loss', 'content': 0.09751515090465546, 'timestamp': '2025-09-30 22:39:04.497373', 'step': 21721, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.530050', 'step': 21721, 'epoch': 3} {'type': 'loss', 'content': 0.17013400793075562, 'timestamp': '2025-09-30 22:39:04.540960', 'step': 21722, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.579729', 'step': 21722, 'epoch': 3} {'type': 'loss', 'content': 0.17459014058113098, 'timestamp': '2025-09-30 22:39:04.582941', 'step': 21723, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.614474', 'step': 21723, 'epoch': 3} {'type': 'loss', 'content': 0.05060478299856186, 'timestamp': '2025-09-30 22:39:04.641315', 'step': 21724, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.686913', 'step': 21724, 'epoch': 3} {'type': 'loss', 'content': 0.1247493177652359, 'timestamp': '2025-09-30 22:39:04.693987', 'step': 21725, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:04.728640', 'step': 21725, 'epoch': 3} {'type': 'loss', 'content': 0.06462797522544861, 'timestamp': '2025-09-30 22:39:04.732696', 'step': 21726, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:04.763678', 'step': 21726, 'epoch': 3} {'type': 'loss', 'content': 0.027058225125074387, 'timestamp': '2025-09-30 22:39:04.772873', 'step': 21727, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:04.804132', 'step': 21727, 'epoch': 3} {'type': 'loss', 'content': 0.051487911492586136, 'timestamp': '2025-09-30 22:39:04.829348', 'step': 21728, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:04.873028', 'step': 21728, 'epoch': 3} {'type': 'loss', 'content': 0.08221728354692459, 'timestamp': '2025-09-30 22:39:04.877112', 'step': 21729, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.912965', 'step': 21729, 'epoch': 3} {'type': 'loss', 'content': 0.06653658300638199, 'timestamp': '2025-09-30 22:39:04.916419', 'step': 21730, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:04.949151', 'step': 21730, 'epoch': 3} {'type': 'loss', 'content': 0.02324063703417778, 'timestamp': '2025-09-30 22:39:04.954590', 'step': 21731, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:04.986667', 'step': 21731, 'epoch': 3} {'type': 'loss', 'content': 0.11083132028579712, 'timestamp': '2025-09-30 22:39:05.013467', 'step': 21732, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.061329', 'step': 21732, 'epoch': 3} {'type': 'loss', 'content': 0.029186004772782326, 'timestamp': '2025-09-30 22:39:05.077756', 'step': 21733, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.125441', 'step': 21733, 'epoch': 3} {'type': 'loss', 'content': 0.024448929354548454, 'timestamp': '2025-09-30 22:39:05.130935', 'step': 21734, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.165719', 'step': 21734, 'epoch': 3} {'type': 'loss', 'content': 0.040844421833753586, 'timestamp': '2025-09-30 22:39:05.181289', 'step': 21735, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.232161', 'step': 21735, 'epoch': 3} {'type': 'loss', 'content': 0.06802112609148026, 'timestamp': '2025-09-30 22:39:05.275110', 'step': 21736, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.324476', 'step': 21736, 'epoch': 3} {'type': 'loss', 'content': 0.04591061547398567, 'timestamp': '2025-09-30 22:39:05.364376', 'step': 21737, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.406688', 'step': 21737, 'epoch': 3} {'type': 'loss', 'content': 0.05209851264953613, 'timestamp': '2025-09-30 22:39:05.425547', 'step': 21738, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:05.462541', 'step': 21738, 'epoch': 3} {'type': 'loss', 'content': 0.0546787865459919, 'timestamp': '2025-09-30 22:39:05.473471', 'step': 21739, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.543626', 'step': 21739, 'epoch': 3} {'type': 'loss', 'content': 0.05406716465950012, 'timestamp': '2025-09-30 22:39:05.580714', 'step': 21740, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:05.631975', 'step': 21740, 'epoch': 3} {'type': 'loss', 'content': 0.08731815963983536, 'timestamp': '2025-09-30 22:39:05.646782', 'step': 21741, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:05.679650', 'step': 21741, 'epoch': 3} {'type': 'loss', 'content': 0.09798495471477509, 'timestamp': '2025-09-30 22:39:05.693860', 'step': 21742, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:05.736869', 'step': 21742, 'epoch': 3} {'type': 'loss', 'content': 0.06767654418945312, 'timestamp': '2025-09-30 22:39:05.748514', 'step': 21743, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:05.800721', 'step': 21743, 'epoch': 3} {'type': 'loss', 'content': 0.10548961162567139, 'timestamp': '2025-09-30 22:39:05.840472', 'step': 21744, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:05.891910', 'step': 21744, 'epoch': 3} {'type': 'loss', 'content': 0.04965570196509361, 'timestamp': '2025-09-30 22:39:05.899758', 'step': 21745, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:05.964136', 'step': 21745, 'epoch': 3} {'type': 'loss', 'content': 0.1142437756061554, 'timestamp': '2025-09-30 22:39:05.983491', 'step': 21746, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:06.033209', 'step': 21746, 'epoch': 3} {'type': 'loss', 'content': 0.10575433075428009, 'timestamp': '2025-09-30 22:39:06.042858', 'step': 21747, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:06.080107', 'step': 21747, 'epoch': 3} {'type': 'loss', 'content': 0.05883202701807022, 'timestamp': '2025-09-30 22:39:06.113898', 'step': 21748, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:06.158865', 'step': 21748, 'epoch': 3} {'type': 'loss', 'content': 0.08007972687482834, 'timestamp': '2025-09-30 22:39:06.175387', 'step': 21749, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:06.216789', 'step': 21749, 'epoch': 3} {'type': 'loss', 'content': 0.08681242913007736, 'timestamp': '2025-09-30 22:39:06.237571', 'step': 21750, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:06.280731', 'step': 21750, 'epoch': 3} {'type': 'loss', 'content': 0.06443433463573456, 'timestamp': '2025-09-30 22:39:06.290833', 'step': 21751, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:06.346488', 'step': 21751, 'epoch': 3} {'type': 'loss', 'content': 0.05277775600552559, 'timestamp': '2025-09-30 22:39:06.379490', 'step': 21752, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.420833', 'step': 21752, 'epoch': 3} {'type': 'loss', 'content': 0.08450200408697128, 'timestamp': '2025-09-30 22:39:06.429605', 'step': 21753, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:06.469038', 'step': 21753, 'epoch': 3} {'type': 'loss', 'content': 0.07375986874103546, 'timestamp': '2025-09-30 22:39:06.475738', 'step': 21754, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.510033', 'step': 21754, 'epoch': 3} {'type': 'loss', 'content': 0.025223832577466965, 'timestamp': '2025-09-30 22:39:06.513454', 'step': 21755, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:06.545843', 'step': 21755, 'epoch': 3} {'type': 'loss', 'content': 0.05092836916446686, 'timestamp': '2025-09-30 22:39:06.571891', 'step': 21756, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:06.606047', 'step': 21756, 'epoch': 3} {'type': 'loss', 'content': 0.07467694580554962, 'timestamp': '2025-09-30 22:39:06.616899', 'step': 21757, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.648810', 'step': 21757, 'epoch': 3} {'type': 'loss', 'content': 0.12815099954605103, 'timestamp': '2025-09-30 22:39:06.652653', 'step': 21758, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:06.683703', 'step': 21758, 'epoch': 3} {'type': 'loss', 'content': 0.09095394611358643, 'timestamp': '2025-09-30 22:39:06.688736', 'step': 21759, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.724488', 'step': 21759, 'epoch': 3} {'type': 'loss', 'content': 0.03893611952662468, 'timestamp': '2025-09-30 22:39:06.750428', 'step': 21760, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:06.782016', 'step': 21760, 'epoch': 3} {'type': 'loss', 'content': 0.0896560549736023, 'timestamp': '2025-09-30 22:39:06.787240', 'step': 21761, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:06.817692', 'step': 21761, 'epoch': 3} {'type': 'loss', 'content': 0.01923629641532898, 'timestamp': '2025-09-30 22:39:06.821407', 'step': 21762, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.856104', 'step': 21762, 'epoch': 3} {'type': 'loss', 'content': 0.06249883770942688, 'timestamp': '2025-09-30 22:39:06.861416', 'step': 21763, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:06.894270', 'step': 21763, 'epoch': 3} {'type': 'loss', 'content': 0.07093153893947601, 'timestamp': '2025-09-30 22:39:06.919724', 'step': 21764, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:06.950534', 'step': 21764, 'epoch': 3} {'type': 'loss', 'content': 0.06416326761245728, 'timestamp': '2025-09-30 22:39:06.956044', 'step': 21765, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:06.988460', 'step': 21765, 'epoch': 3} {'type': 'loss', 'content': 0.040600091218948364, 'timestamp': '2025-09-30 22:39:07.002525', 'step': 21766, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.035151', 'step': 21766, 'epoch': 3} {'type': 'loss', 'content': 0.03281402587890625, 'timestamp': '2025-09-30 22:39:07.041015', 'step': 21767, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.073261', 'step': 21767, 'epoch': 3} {'type': 'loss', 'content': 0.07502830773591995, 'timestamp': '2025-09-30 22:39:07.100055', 'step': 21768, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.132730', 'step': 21768, 'epoch': 3} {'type': 'loss', 'content': 0.06123270094394684, 'timestamp': '2025-09-30 22:39:07.149435', 'step': 21769, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.188401', 'step': 21769, 'epoch': 3} {'type': 'loss', 'content': 0.033609334379434586, 'timestamp': '2025-09-30 22:39:07.193751', 'step': 21770, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.227924', 'step': 21770, 'epoch': 3} {'type': 'loss', 'content': 0.012965302914381027, 'timestamp': '2025-09-30 22:39:07.230588', 'step': 21771, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.267836', 'step': 21771, 'epoch': 3} {'type': 'loss', 'content': 0.029001817107200623, 'timestamp': '2025-09-30 22:39:07.292911', 'step': 21772, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.323987', 'step': 21772, 'epoch': 3} {'type': 'loss', 'content': 0.041204359382390976, 'timestamp': '2025-09-30 22:39:07.340836', 'step': 21773, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:07.374324', 'step': 21773, 'epoch': 3} {'type': 'loss', 'content': 0.04826672747731209, 'timestamp': '2025-09-30 22:39:07.379868', 'step': 21774, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.413631', 'step': 21774, 'epoch': 3} {'type': 'loss', 'content': 0.03980487212538719, 'timestamp': '2025-09-30 22:39:07.417715', 'step': 21775, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.448772', 'step': 21775, 'epoch': 3} {'type': 'loss', 'content': 0.035826701670885086, 'timestamp': '2025-09-30 22:39:07.474737', 'step': 21776, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.511670', 'step': 21776, 'epoch': 3} {'type': 'loss', 'content': 0.03336304798722267, 'timestamp': '2025-09-30 22:39:07.518362', 'step': 21777, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:07.551829', 'step': 21777, 'epoch': 3} {'type': 'loss', 'content': 0.04434383288025856, 'timestamp': '2025-09-30 22:39:07.554980', 'step': 21778, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:07.600261', 'step': 21778, 'epoch': 3} {'type': 'loss', 'content': 0.05543681979179382, 'timestamp': '2025-09-30 22:39:07.605630', 'step': 21779, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.649131', 'step': 21779, 'epoch': 3} {'type': 'loss', 'content': 0.0738479495048523, 'timestamp': '2025-09-30 22:39:07.674942', 'step': 21780, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.716813', 'step': 21780, 'epoch': 3} {'type': 'loss', 'content': 0.08836480975151062, 'timestamp': '2025-09-30 22:39:07.720360', 'step': 21781, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:07.759056', 'step': 21781, 'epoch': 3} {'type': 'loss', 'content': 0.03531324863433838, 'timestamp': '2025-09-30 22:39:07.764255', 'step': 21782, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:07.798163', 'step': 21782, 'epoch': 3} {'type': 'loss', 'content': 0.12506674230098724, 'timestamp': '2025-09-30 22:39:07.802623', 'step': 21783, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:07.841197', 'step': 21783, 'epoch': 3} {'type': 'loss', 'content': 0.11696101725101471, 'timestamp': '2025-09-30 22:39:07.865596', 'step': 21784, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.904783', 'step': 21784, 'epoch': 3} {'type': 'loss', 'content': 0.04415607452392578, 'timestamp': '2025-09-30 22:39:07.908843', 'step': 21785, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.941696', 'step': 21785, 'epoch': 3} {'type': 'loss', 'content': 0.04211565479636192, 'timestamp': '2025-09-30 22:39:07.946294', 'step': 21786, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:07.980338', 'step': 21786, 'epoch': 3} {'type': 'loss', 'content': 0.090752512216568, 'timestamp': '2025-09-30 22:39:07.984339', 'step': 21787, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.017021', 'step': 21787, 'epoch': 3} {'type': 'loss', 'content': 0.04645753279328346, 'timestamp': '2025-09-30 22:39:08.043503', 'step': 21788, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.081595', 'step': 21788, 'epoch': 3} {'type': 'loss', 'content': 0.10968256741762161, 'timestamp': '2025-09-30 22:39:08.086244', 'step': 21789, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.118270', 'step': 21789, 'epoch': 3} {'type': 'loss', 'content': 0.04154485836625099, 'timestamp': '2025-09-30 22:39:08.123126', 'step': 21790, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:08.156337', 'step': 21790, 'epoch': 3} {'type': 'loss', 'content': 0.079405777156353, 'timestamp': '2025-09-30 22:39:08.162046', 'step': 21791, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:08.195370', 'step': 21791, 'epoch': 3} {'type': 'loss', 'content': 0.10687227547168732, 'timestamp': '2025-09-30 22:39:08.220123', 'step': 21792, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.263234', 'step': 21792, 'epoch': 3} {'type': 'loss', 'content': 0.10193697363138199, 'timestamp': '2025-09-30 22:39:08.266906', 'step': 21793, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:08.300773', 'step': 21793, 'epoch': 3} {'type': 'loss', 'content': 0.10958121716976166, 'timestamp': '2025-09-30 22:39:08.304707', 'step': 21794, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:08.335953', 'step': 21794, 'epoch': 3} {'type': 'loss', 'content': 0.08701768517494202, 'timestamp': '2025-09-30 22:39:08.353420', 'step': 21795, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:08.390602', 'step': 21795, 'epoch': 3} {'type': 'loss', 'content': 0.04696791619062424, 'timestamp': '2025-09-30 22:39:08.416777', 'step': 21796, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:08.461944', 'step': 21796, 'epoch': 3} {'type': 'loss', 'content': 0.08855444192886353, 'timestamp': '2025-09-30 22:39:08.467685', 'step': 21797, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:08.502936', 'step': 21797, 'epoch': 3} {'type': 'loss', 'content': 0.046283427625894547, 'timestamp': '2025-09-30 22:39:08.507012', 'step': 21798, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.556330', 'step': 21798, 'epoch': 3} {'type': 'loss', 'content': 0.07075681537389755, 'timestamp': '2025-09-30 22:39:08.561200', 'step': 21799, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.602716', 'step': 21799, 'epoch': 3} {'type': 'loss', 'content': 0.01799904741346836, 'timestamp': '2025-09-30 22:39:08.631028', 'step': 21800, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:08.679710', 'step': 21800, 'epoch': 3} {'type': 'loss', 'content': 0.04921218007802963, 'timestamp': '2025-09-30 22:39:08.692555', 'step': 21801, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:08.724273', 'step': 21801, 'epoch': 3} {'type': 'loss', 'content': 0.1323237270116806, 'timestamp': '2025-09-30 22:39:08.728116', 'step': 21802, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:08.759494', 'step': 21802, 'epoch': 3} {'type': 'loss', 'content': 0.03341004252433777, 'timestamp': '2025-09-30 22:39:08.777423', 'step': 21803, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:08.816056', 'step': 21803, 'epoch': 3} {'type': 'loss', 'content': 0.10448339581489563, 'timestamp': '2025-09-30 22:39:08.840874', 'step': 21804, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.872995', 'step': 21804, 'epoch': 3} {'type': 'loss', 'content': 0.06556134670972824, 'timestamp': '2025-09-30 22:39:08.878719', 'step': 21805, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:08.912182', 'step': 21805, 'epoch': 3} {'type': 'loss', 'content': 0.07372134923934937, 'timestamp': '2025-09-30 22:39:08.919137', 'step': 21806, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:08.962033', 'step': 21806, 'epoch': 3} {'type': 'loss', 'content': 0.0239272378385067, 'timestamp': '2025-09-30 22:39:08.973634', 'step': 21807, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:09.008512', 'step': 21807, 'epoch': 3} {'type': 'loss', 'content': 0.032813746482133865, 'timestamp': '2025-09-30 22:39:09.040609', 'step': 21808, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.071070', 'step': 21808, 'epoch': 3} {'type': 'loss', 'content': 0.03355584666132927, 'timestamp': '2025-09-30 22:39:09.074670', 'step': 21809, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.106266', 'step': 21809, 'epoch': 3} {'type': 'loss', 'content': 0.043451838195323944, 'timestamp': '2025-09-30 22:39:09.121315', 'step': 21810, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:09.154379', 'step': 21810, 'epoch': 3} {'type': 'loss', 'content': 0.03618570789694786, 'timestamp': '2025-09-30 22:39:09.158921', 'step': 21811, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:09.190504', 'step': 21811, 'epoch': 3} {'type': 'loss', 'content': 0.08235100656747818, 'timestamp': '2025-09-30 22:39:09.223170', 'step': 21812, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.256054', 'step': 21812, 'epoch': 3} {'type': 'loss', 'content': 0.1335836797952652, 'timestamp': '2025-09-30 22:39:09.259675', 'step': 21813, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:09.293315', 'step': 21813, 'epoch': 3} {'type': 'loss', 'content': 0.0604885034263134, 'timestamp': '2025-09-30 22:39:09.296350', 'step': 21814, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:09.327503', 'step': 21814, 'epoch': 3} {'type': 'loss', 'content': 0.051125261932611465, 'timestamp': '2025-09-30 22:39:09.331950', 'step': 21815, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.373489', 'step': 21815, 'epoch': 3} {'type': 'loss', 'content': 0.08593746274709702, 'timestamp': '2025-09-30 22:39:09.400377', 'step': 21816, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:09.433160', 'step': 21816, 'epoch': 3} {'type': 'loss', 'content': 0.06669105589389801, 'timestamp': '2025-09-30 22:39:09.438717', 'step': 21817, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:09.471301', 'step': 21817, 'epoch': 3} {'type': 'loss', 'content': 0.09668811410665512, 'timestamp': '2025-09-30 22:39:09.490777', 'step': 21818, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.523266', 'step': 21818, 'epoch': 3} {'type': 'loss', 'content': 0.06741231679916382, 'timestamp': '2025-09-30 22:39:09.527906', 'step': 21819, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:09.561322', 'step': 21819, 'epoch': 3} {'type': 'loss', 'content': 0.13127703964710236, 'timestamp': '2025-09-30 22:39:09.588055', 'step': 21820, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:09.620986', 'step': 21820, 'epoch': 3} {'type': 'loss', 'content': 0.023708146065473557, 'timestamp': '2025-09-30 22:39:09.638640', 'step': 21821, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.687940', 'step': 21821, 'epoch': 3} {'type': 'loss', 'content': 0.12783394753932953, 'timestamp': '2025-09-30 22:39:09.692422', 'step': 21822, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:09.726768', 'step': 21822, 'epoch': 3} {'type': 'loss', 'content': 0.10959433019161224, 'timestamp': '2025-09-30 22:39:09.732651', 'step': 21823, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.765655', 'step': 21823, 'epoch': 3} {'type': 'loss', 'content': 0.04554217308759689, 'timestamp': '2025-09-30 22:39:09.792827', 'step': 21824, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:09.825157', 'step': 21824, 'epoch': 3} {'type': 'loss', 'content': 0.09288987517356873, 'timestamp': '2025-09-30 22:39:09.831714', 'step': 21825, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:09.865427', 'step': 21825, 'epoch': 3} {'type': 'loss', 'content': 0.08773215860128403, 'timestamp': '2025-09-30 22:39:09.870521', 'step': 21826, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:09.903831', 'step': 21826, 'epoch': 3} {'type': 'loss', 'content': 0.0597417913377285, 'timestamp': '2025-09-30 22:39:09.913841', 'step': 21827, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:09.947933', 'step': 21827, 'epoch': 3} {'type': 'loss', 'content': 0.04946181923151016, 'timestamp': '2025-09-30 22:39:09.976452', 'step': 21828, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.009840', 'step': 21828, 'epoch': 3} {'type': 'loss', 'content': 0.04169517010450363, 'timestamp': '2025-09-30 22:39:10.013932', 'step': 21829, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.046827', 'step': 21829, 'epoch': 3} {'type': 'loss', 'content': 0.13924364745616913, 'timestamp': '2025-09-30 22:39:10.051179', 'step': 21830, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.099544', 'step': 21830, 'epoch': 3} {'type': 'loss', 'content': 0.04484966769814491, 'timestamp': '2025-09-30 22:39:10.105633', 'step': 21831, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.138322', 'step': 21831, 'epoch': 3} {'type': 'loss', 'content': 0.06144183129072189, 'timestamp': '2025-09-30 22:39:10.166559', 'step': 21832, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.201955', 'step': 21832, 'epoch': 3} {'type': 'loss', 'content': 0.024118144065141678, 'timestamp': '2025-09-30 22:39:10.206651', 'step': 21833, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.248204', 'step': 21833, 'epoch': 3} {'type': 'loss', 'content': 0.021839361637830734, 'timestamp': '2025-09-30 22:39:10.253604', 'step': 21834, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.287944', 'step': 21834, 'epoch': 3} {'type': 'loss', 'content': 0.09311319142580032, 'timestamp': '2025-09-30 22:39:10.293705', 'step': 21835, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.328759', 'step': 21835, 'epoch': 3} {'type': 'loss', 'content': 0.058817602694034576, 'timestamp': '2025-09-30 22:39:10.356837', 'step': 21836, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:10.404345', 'step': 21836, 'epoch': 3} {'type': 'loss', 'content': 0.05528522655367851, 'timestamp': '2025-09-30 22:39:10.425606', 'step': 21837, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:10.475157', 'step': 21837, 'epoch': 3} {'type': 'loss', 'content': 0.007386920973658562, 'timestamp': '2025-09-30 22:39:10.481175', 'step': 21838, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:10.513512', 'step': 21838, 'epoch': 3} {'type': 'loss', 'content': 0.07985217124223709, 'timestamp': '2025-09-30 22:39:10.518415', 'step': 21839, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.551326', 'step': 21839, 'epoch': 3} {'type': 'loss', 'content': 0.049053583294153214, 'timestamp': '2025-09-30 22:39:10.578128', 'step': 21840, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:10.610836', 'step': 21840, 'epoch': 3} {'type': 'loss', 'content': 0.00951148010790348, 'timestamp': '2025-09-30 22:39:10.616972', 'step': 21841, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:10.650166', 'step': 21841, 'epoch': 3} {'type': 'loss', 'content': 0.06611843407154083, 'timestamp': '2025-09-30 22:39:10.654675', 'step': 21842, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:10.689800', 'step': 21842, 'epoch': 3} {'type': 'loss', 'content': 0.06963944435119629, 'timestamp': '2025-09-30 22:39:10.709644', 'step': 21843, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.742809', 'step': 21843, 'epoch': 3} {'type': 'loss', 'content': 0.06626692414283752, 'timestamp': '2025-09-30 22:39:10.768815', 'step': 21844, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.803092', 'step': 21844, 'epoch': 3} {'type': 'loss', 'content': 0.05925901606678963, 'timestamp': '2025-09-30 22:39:10.806663', 'step': 21845, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:10.841711', 'step': 21845, 'epoch': 3} {'type': 'loss', 'content': 0.11029818654060364, 'timestamp': '2025-09-30 22:39:10.845927', 'step': 21846, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:10.879502', 'step': 21846, 'epoch': 3} {'type': 'loss', 'content': 0.04537217691540718, 'timestamp': '2025-09-30 22:39:10.885389', 'step': 21847, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.919073', 'step': 21847, 'epoch': 3} {'type': 'loss', 'content': 0.07090406119823456, 'timestamp': '2025-09-30 22:39:10.945822', 'step': 21848, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:10.991938', 'step': 21848, 'epoch': 3} {'type': 'loss', 'content': 0.10228094458580017, 'timestamp': '2025-09-30 22:39:10.997703', 'step': 21849, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:11.031047', 'step': 21849, 'epoch': 3} {'type': 'loss', 'content': 0.06826193630695343, 'timestamp': '2025-09-30 22:39:11.035931', 'step': 21850, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:11.070077', 'step': 21850, 'epoch': 3} {'type': 'loss', 'content': 0.02471681870520115, 'timestamp': '2025-09-30 22:39:11.074220', 'step': 21851, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:11.108163', 'step': 21851, 'epoch': 3} {'type': 'loss', 'content': 0.007872569374740124, 'timestamp': '2025-09-30 22:39:11.133230', 'step': 21852, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:11.166620', 'step': 21852, 'epoch': 3} {'type': 'loss', 'content': 0.023211078718304634, 'timestamp': '2025-09-30 22:39:11.170768', 'step': 21853, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:11.204277', 'step': 21853, 'epoch': 3} {'type': 'loss', 'content': 0.08074836432933807, 'timestamp': '2025-09-30 22:39:11.208859', 'step': 21854, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:11.250625', 'step': 21854, 'epoch': 3} {'type': 'loss', 'content': 0.04802386090159416, 'timestamp': '2025-09-30 22:39:11.255790', 'step': 21855, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:11.288996', 'step': 21855, 'epoch': 3} {'type': 'loss', 'content': 0.07945995777845383, 'timestamp': '2025-09-30 22:39:11.315600', 'step': 21856, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.348474', 'step': 21856, 'epoch': 3} {'type': 'loss', 'content': 0.014645281247794628, 'timestamp': '2025-09-30 22:39:11.352495', 'step': 21857, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:11.387022', 'step': 21857, 'epoch': 3} {'type': 'loss', 'content': 0.04221276938915253, 'timestamp': '2025-09-30 22:39:11.391521', 'step': 21858, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:39:11.425322', 'step': 21858, 'epoch': 3} {'type': 'loss', 'content': 0.04284869506955147, 'timestamp': '2025-09-30 22:39:11.444496', 'step': 21859, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.477122', 'step': 21859, 'epoch': 3} {'type': 'loss', 'content': 0.05551881343126297, 'timestamp': '2025-09-30 22:39:11.501473', 'step': 21860, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.539362', 'step': 21860, 'epoch': 3} {'type': 'loss', 'content': 0.11569403856992722, 'timestamp': '2025-09-30 22:39:11.543920', 'step': 21861, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.576371', 'step': 21861, 'epoch': 3} {'type': 'loss', 'content': 0.06262476742267609, 'timestamp': '2025-09-30 22:39:11.580210', 'step': 21862, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.615779', 'step': 21862, 'epoch': 3} {'type': 'loss', 'content': 0.10093805938959122, 'timestamp': '2025-09-30 22:39:11.631174', 'step': 21863, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:11.663699', 'step': 21863, 'epoch': 3} {'type': 'loss', 'content': 0.035209428519010544, 'timestamp': '2025-09-30 22:39:11.690615', 'step': 21864, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.726211', 'step': 21864, 'epoch': 3} {'type': 'loss', 'content': 0.022944284602999687, 'timestamp': '2025-09-30 22:39:11.730755', 'step': 21865, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.763849', 'step': 21865, 'epoch': 3} {'type': 'loss', 'content': 0.06202727556228638, 'timestamp': '2025-09-30 22:39:11.770598', 'step': 21866, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.803318', 'step': 21866, 'epoch': 3} {'type': 'loss', 'content': 0.05112563073635101, 'timestamp': '2025-09-30 22:39:11.807405', 'step': 21867, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:11.841318', 'step': 21867, 'epoch': 3} {'type': 'loss', 'content': 0.0052845715545117855, 'timestamp': '2025-09-30 22:39:11.870453', 'step': 21868, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:11.913810', 'step': 21868, 'epoch': 3} {'type': 'loss', 'content': 0.07055382430553436, 'timestamp': '2025-09-30 22:39:11.923488', 'step': 21869, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:11.959323', 'step': 21869, 'epoch': 3} {'type': 'loss', 'content': 0.10687435418367386, 'timestamp': '2025-09-30 22:39:11.962515', 'step': 21870, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:12.003474', 'step': 21870, 'epoch': 3} {'type': 'loss', 'content': 0.06839574128389359, 'timestamp': '2025-09-30 22:39:12.006343', 'step': 21871, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.037910', 'step': 21871, 'epoch': 3} {'type': 'loss', 'content': 0.08879555016756058, 'timestamp': '2025-09-30 22:39:12.062702', 'step': 21872, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:12.095415', 'step': 21872, 'epoch': 3} {'type': 'loss', 'content': 0.08578842133283615, 'timestamp': '2025-09-30 22:39:12.112313', 'step': 21873, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.153075', 'step': 21873, 'epoch': 3} {'type': 'loss', 'content': 0.04944618418812752, 'timestamp': '2025-09-30 22:39:12.161084', 'step': 21874, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:12.197775', 'step': 21874, 'epoch': 3} {'type': 'loss', 'content': 0.1410418599843979, 'timestamp': '2025-09-30 22:39:12.201800', 'step': 21875, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.232731', 'step': 21875, 'epoch': 3} {'type': 'loss', 'content': 0.02794046700000763, 'timestamp': '2025-09-30 22:39:12.260605', 'step': 21876, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:12.292488', 'step': 21876, 'epoch': 3} {'type': 'loss', 'content': 0.14000153541564941, 'timestamp': '2025-09-30 22:39:12.297507', 'step': 21877, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.340513', 'step': 21877, 'epoch': 3} {'type': 'loss', 'content': 0.08955522626638412, 'timestamp': '2025-09-30 22:39:12.343927', 'step': 21878, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:12.390318', 'step': 21878, 'epoch': 3} {'type': 'loss', 'content': 0.05868787318468094, 'timestamp': '2025-09-30 22:39:12.395103', 'step': 21879, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:12.428458', 'step': 21879, 'epoch': 3} {'type': 'loss', 'content': 0.06192412227392197, 'timestamp': '2025-09-30 22:39:12.454819', 'step': 21880, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.487437', 'step': 21880, 'epoch': 3} {'type': 'loss', 'content': 0.09564002603292465, 'timestamp': '2025-09-30 22:39:12.498857', 'step': 21881, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:12.534612', 'step': 21881, 'epoch': 3} {'type': 'loss', 'content': 0.10809478908777237, 'timestamp': '2025-09-30 22:39:12.556418', 'step': 21882, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.590783', 'step': 21882, 'epoch': 3} {'type': 'loss', 'content': 0.07806102931499481, 'timestamp': '2025-09-30 22:39:12.614578', 'step': 21883, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.659057', 'step': 21883, 'epoch': 3} {'type': 'loss', 'content': 0.025116581469774246, 'timestamp': '2025-09-30 22:39:12.686330', 'step': 21884, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:12.730466', 'step': 21884, 'epoch': 3} {'type': 'loss', 'content': 0.11410059034824371, 'timestamp': '2025-09-30 22:39:12.734000', 'step': 21885, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.767482', 'step': 21885, 'epoch': 3} {'type': 'loss', 'content': 0.020456958562135696, 'timestamp': '2025-09-30 22:39:12.771861', 'step': 21886, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:12.804084', 'step': 21886, 'epoch': 3} {'type': 'loss', 'content': 0.028687290847301483, 'timestamp': '2025-09-30 22:39:12.816810', 'step': 21887, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:12.858669', 'step': 21887, 'epoch': 3} {'type': 'loss', 'content': 0.06654995679855347, 'timestamp': '2025-09-30 22:39:12.884756', 'step': 21888, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:12.915904', 'step': 21888, 'epoch': 3} {'type': 'loss', 'content': 0.017912454903125763, 'timestamp': '2025-09-30 22:39:12.919329', 'step': 21889, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:12.961533', 'step': 21889, 'epoch': 3} {'type': 'loss', 'content': 0.046810418367385864, 'timestamp': '2025-09-30 22:39:12.967669', 'step': 21890, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:13.018236', 'step': 21890, 'epoch': 3} {'type': 'loss', 'content': 0.08827083557844162, 'timestamp': '2025-09-30 22:39:13.025121', 'step': 21891, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:13.060729', 'step': 21891, 'epoch': 3} {'type': 'loss', 'content': 0.06971829384565353, 'timestamp': '2025-09-30 22:39:13.087303', 'step': 21892, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:13.120167', 'step': 21892, 'epoch': 3} {'type': 'loss', 'content': 0.12678557634353638, 'timestamp': '2025-09-30 22:39:13.123836', 'step': 21893, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:13.157796', 'step': 21893, 'epoch': 3} {'type': 'loss', 'content': 0.07512728124856949, 'timestamp': '2025-09-30 22:39:13.161424', 'step': 21894, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:13.195107', 'step': 21894, 'epoch': 3} {'type': 'loss', 'content': 0.10568196326494217, 'timestamp': '2025-09-30 22:39:13.201012', 'step': 21895, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:13.236390', 'step': 21895, 'epoch': 3} {'type': 'loss', 'content': 0.035964809358119965, 'timestamp': '2025-09-30 22:39:13.261236', 'step': 21896, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:13.297425', 'step': 21896, 'epoch': 3} {'type': 'loss', 'content': 0.0710892304778099, 'timestamp': '2025-09-30 22:39:13.304715', 'step': 21897, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:13.354558', 'step': 21897, 'epoch': 3} {'type': 'loss', 'content': 0.1405976116657257, 'timestamp': '2025-09-30 22:39:13.366038', 'step': 21898, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:13.403726', 'step': 21898, 'epoch': 3} {'type': 'loss', 'content': 0.04181796312332153, 'timestamp': '2025-09-30 22:39:13.414377', 'step': 21899, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:13.459954', 'step': 21899, 'epoch': 3} {'type': 'loss', 'content': 0.10390114039182663, 'timestamp': '2025-09-30 22:39:13.496276', 'step': 21900, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:13.550837', 'step': 21900, 'epoch': 3} {'type': 'loss', 'content': 0.10062002390623093, 'timestamp': '2025-09-30 22:39:13.579021', 'step': 21901, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:13.614278', 'step': 21901, 'epoch': 3} {'type': 'loss', 'content': 0.017920486629009247, 'timestamp': '2025-09-30 22:39:13.621296', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:39:21.441374', 'step': 21902, 'epoch': 3} {'type': 'pplx', 'content': 9594.087457166603, 'timestamp': '2025-09-30 22:39:21.445661', 'step': 21902, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:21.488525', 'step': 21902, 'epoch': 3} {'type': 'loss', 'content': 0.10131023079156876, 'timestamp': '2025-09-30 22:39:21.496038', 'step': 21903, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:21.529307', 'step': 21903, 'epoch': 3} {'type': 'loss', 'content': 0.02784336730837822, 'timestamp': '2025-09-30 22:39:21.553549', 'step': 21904, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:21.588482', 'step': 21904, 'epoch': 3} {'type': 'loss', 'content': 0.03282928466796875, 'timestamp': '2025-09-30 22:39:21.600496', 'step': 21905, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:21.649308', 'step': 21905, 'epoch': 3} {'type': 'loss', 'content': 0.08698037266731262, 'timestamp': '2025-09-30 22:39:21.664955', 'step': 21906, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:21.716926', 'step': 21906, 'epoch': 3} {'type': 'loss', 'content': 0.09456352144479752, 'timestamp': '2025-09-30 22:39:21.730423', 'step': 21907, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:21.769446', 'step': 21907, 'epoch': 3} {'type': 'loss', 'content': 0.06768333166837692, 'timestamp': '2025-09-30 22:39:21.794905', 'step': 21908, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:21.848458', 'step': 21908, 'epoch': 3} {'type': 'loss', 'content': 0.02916155569255352, 'timestamp': '2025-09-30 22:39:21.853986', 'step': 21909, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:21.899935', 'step': 21909, 'epoch': 3} {'type': 'loss', 'content': 0.051600534468889236, 'timestamp': '2025-09-30 22:39:21.905770', 'step': 21910, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:21.944341', 'step': 21910, 'epoch': 3} {'type': 'loss', 'content': 0.07901660352945328, 'timestamp': '2025-09-30 22:39:21.947778', 'step': 21911, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:21.992011', 'step': 21911, 'epoch': 3} {'type': 'loss', 'content': 0.0708051323890686, 'timestamp': '2025-09-30 22:39:22.019282', 'step': 21912, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:22.078830', 'step': 21912, 'epoch': 3} {'type': 'loss', 'content': 0.09732605516910553, 'timestamp': '2025-09-30 22:39:22.084349', 'step': 21913, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:22.131584', 'step': 21913, 'epoch': 3} {'type': 'loss', 'content': 0.026574207469820976, 'timestamp': '2025-09-30 22:39:22.136121', 'step': 21914, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:22.180811', 'step': 21914, 'epoch': 3} {'type': 'loss', 'content': 0.1524803638458252, 'timestamp': '2025-09-30 22:39:22.189275', 'step': 21915, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.226368', 'step': 21915, 'epoch': 3} {'type': 'loss', 'content': 0.0549115389585495, 'timestamp': '2025-09-30 22:39:22.256804', 'step': 21916, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:22.301865', 'step': 21916, 'epoch': 3} {'type': 'loss', 'content': 0.04033074527978897, 'timestamp': '2025-09-30 22:39:22.304499', 'step': 21917, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.338783', 'step': 21917, 'epoch': 3} {'type': 'loss', 'content': 0.09579623490571976, 'timestamp': '2025-09-30 22:39:22.342900', 'step': 21918, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.379781', 'step': 21918, 'epoch': 3} {'type': 'loss', 'content': 0.042258989065885544, 'timestamp': '2025-09-30 22:39:22.393469', 'step': 21919, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.430353', 'step': 21919, 'epoch': 3} {'type': 'loss', 'content': 0.0527242049574852, 'timestamp': '2025-09-30 22:39:22.456901', 'step': 21920, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.491055', 'step': 21920, 'epoch': 3} {'type': 'loss', 'content': 0.008045769296586514, 'timestamp': '2025-09-30 22:39:22.504680', 'step': 21921, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:22.552797', 'step': 21921, 'epoch': 3} {'type': 'loss', 'content': 0.059114594012498856, 'timestamp': '2025-09-30 22:39:22.556021', 'step': 21922, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:22.621573', 'step': 21922, 'epoch': 3} {'type': 'loss', 'content': 0.03607657924294472, 'timestamp': '2025-09-30 22:39:22.624824', 'step': 21923, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.658387', 'step': 21923, 'epoch': 3} {'type': 'loss', 'content': 0.07599608600139618, 'timestamp': '2025-09-30 22:39:22.684256', 'step': 21924, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.724026', 'step': 21924, 'epoch': 3} {'type': 'loss', 'content': 0.0581619068980217, 'timestamp': '2025-09-30 22:39:22.728311', 'step': 21925, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.761498', 'step': 21925, 'epoch': 3} {'type': 'loss', 'content': 0.055296946316957474, 'timestamp': '2025-09-30 22:39:22.768902', 'step': 21926, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.802852', 'step': 21926, 'epoch': 3} {'type': 'loss', 'content': 0.033978648483753204, 'timestamp': '2025-09-30 22:39:22.810055', 'step': 21927, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.844424', 'step': 21927, 'epoch': 3} {'type': 'loss', 'content': 0.042162273079156876, 'timestamp': '2025-09-30 22:39:22.869739', 'step': 21928, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.901925', 'step': 21928, 'epoch': 3} {'type': 'loss', 'content': 0.11298204213380814, 'timestamp': '2025-09-30 22:39:22.904981', 'step': 21929, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:22.935767', 'step': 21929, 'epoch': 3} {'type': 'loss', 'content': 0.02300095744431019, 'timestamp': '2025-09-30 22:39:22.939920', 'step': 21930, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:22.972168', 'step': 21930, 'epoch': 3} {'type': 'loss', 'content': 0.08045343309640884, 'timestamp': '2025-09-30 22:39:22.976232', 'step': 21931, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.008128', 'step': 21931, 'epoch': 3} {'type': 'loss', 'content': 0.09708461165428162, 'timestamp': '2025-09-30 22:39:23.035071', 'step': 21932, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:23.066991', 'step': 21932, 'epoch': 3} {'type': 'loss', 'content': 0.08354195952415466, 'timestamp': '2025-09-30 22:39:23.071925', 'step': 21933, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.105317', 'step': 21933, 'epoch': 3} {'type': 'loss', 'content': 0.10046856105327606, 'timestamp': '2025-09-30 22:39:23.118139', 'step': 21934, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:23.155438', 'step': 21934, 'epoch': 3} {'type': 'loss', 'content': 0.0514531172811985, 'timestamp': '2025-09-30 22:39:23.162522', 'step': 21935, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.195830', 'step': 21935, 'epoch': 3} {'type': 'loss', 'content': 0.075025275349617, 'timestamp': '2025-09-30 22:39:23.222729', 'step': 21936, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.265271', 'step': 21936, 'epoch': 3} {'type': 'loss', 'content': 0.0887593924999237, 'timestamp': '2025-09-30 22:39:23.268435', 'step': 21937, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.309762', 'step': 21937, 'epoch': 3} {'type': 'loss', 'content': 0.0874645784497261, 'timestamp': '2025-09-30 22:39:23.313789', 'step': 21938, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.346583', 'step': 21938, 'epoch': 3} {'type': 'loss', 'content': 0.0690249651670456, 'timestamp': '2025-09-30 22:39:23.352267', 'step': 21939, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.385890', 'step': 21939, 'epoch': 3} {'type': 'loss', 'content': 0.015059204772114754, 'timestamp': '2025-09-30 22:39:23.425446', 'step': 21940, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:23.458844', 'step': 21940, 'epoch': 3} {'type': 'loss', 'content': 0.09529503434896469, 'timestamp': '2025-09-30 22:39:23.463685', 'step': 21941, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:23.497082', 'step': 21941, 'epoch': 3} {'type': 'loss', 'content': 0.0527040921151638, 'timestamp': '2025-09-30 22:39:23.501848', 'step': 21942, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:23.534668', 'step': 21942, 'epoch': 3} {'type': 'loss', 'content': 0.04662203788757324, 'timestamp': '2025-09-30 22:39:23.538107', 'step': 21943, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.571486', 'step': 21943, 'epoch': 3} {'type': 'loss', 'content': 0.029398655518889427, 'timestamp': '2025-09-30 22:39:23.598873', 'step': 21944, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.631818', 'step': 21944, 'epoch': 3} {'type': 'loss', 'content': 0.09434230625629425, 'timestamp': '2025-09-30 22:39:23.635488', 'step': 21945, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.676774', 'step': 21945, 'epoch': 3} {'type': 'loss', 'content': 0.04614318907260895, 'timestamp': '2025-09-30 22:39:23.682632', 'step': 21946, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.715182', 'step': 21946, 'epoch': 3} {'type': 'loss', 'content': 0.06316958367824554, 'timestamp': '2025-09-30 22:39:23.719146', 'step': 21947, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:23.752329', 'step': 21947, 'epoch': 3} {'type': 'loss', 'content': 0.04790628328919411, 'timestamp': '2025-09-30 22:39:23.779861', 'step': 21948, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:23.823585', 'step': 21948, 'epoch': 3} {'type': 'loss', 'content': 0.031015658751130104, 'timestamp': '2025-09-30 22:39:23.834600', 'step': 21949, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.875465', 'step': 21949, 'epoch': 3} {'type': 'loss', 'content': 0.07382947206497192, 'timestamp': '2025-09-30 22:39:23.879725', 'step': 21950, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:23.911885', 'step': 21950, 'epoch': 3} {'type': 'loss', 'content': 0.04266833886504173, 'timestamp': '2025-09-30 22:39:23.915580', 'step': 21951, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:23.960928', 'step': 21951, 'epoch': 3} {'type': 'loss', 'content': 0.03892495110630989, 'timestamp': '2025-09-30 22:39:23.994923', 'step': 21952, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:24.028449', 'step': 21952, 'epoch': 3} {'type': 'loss', 'content': 0.035001304000616074, 'timestamp': '2025-09-30 22:39:24.031692', 'step': 21953, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.066394', 'step': 21953, 'epoch': 3} {'type': 'loss', 'content': 0.060049496591091156, 'timestamp': '2025-09-30 22:39:24.069580', 'step': 21954, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:24.108232', 'step': 21954, 'epoch': 3} {'type': 'loss', 'content': 0.04090215265750885, 'timestamp': '2025-09-30 22:39:24.112112', 'step': 21955, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:24.154272', 'step': 21955, 'epoch': 3} {'type': 'loss', 'content': 0.03342907503247261, 'timestamp': '2025-09-30 22:39:24.188117', 'step': 21956, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:24.223511', 'step': 21956, 'epoch': 3} {'type': 'loss', 'content': 0.13680829107761383, 'timestamp': '2025-09-30 22:39:24.226413', 'step': 21957, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.257509', 'step': 21957, 'epoch': 3} {'type': 'loss', 'content': 0.041234515607357025, 'timestamp': '2025-09-30 22:39:24.261998', 'step': 21958, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:24.295514', 'step': 21958, 'epoch': 3} {'type': 'loss', 'content': 0.08440102636814117, 'timestamp': '2025-09-30 22:39:24.307782', 'step': 21959, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.348288', 'step': 21959, 'epoch': 3} {'type': 'loss', 'content': 0.08138921111822128, 'timestamp': '2025-09-30 22:39:24.374570', 'step': 21960, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.406168', 'step': 21960, 'epoch': 3} {'type': 'loss', 'content': 0.05754616856575012, 'timestamp': '2025-09-30 22:39:24.410416', 'step': 21961, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.443574', 'step': 21961, 'epoch': 3} {'type': 'loss', 'content': 0.03925834968686104, 'timestamp': '2025-09-30 22:39:24.458120', 'step': 21962, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.508702', 'step': 21962, 'epoch': 3} {'type': 'loss', 'content': 0.019681282341480255, 'timestamp': '2025-09-30 22:39:24.515699', 'step': 21963, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:24.548279', 'step': 21963, 'epoch': 3} {'type': 'loss', 'content': 0.050790153443813324, 'timestamp': '2025-09-30 22:39:24.587269', 'step': 21964, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:24.618519', 'step': 21964, 'epoch': 3} {'type': 'loss', 'content': 0.0250210203230381, 'timestamp': '2025-09-30 22:39:24.622792', 'step': 21965, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.655186', 'step': 21965, 'epoch': 3} {'type': 'loss', 'content': 0.06687799841165543, 'timestamp': '2025-09-30 22:39:24.661240', 'step': 21966, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.694165', 'step': 21966, 'epoch': 3} {'type': 'loss', 'content': 0.04292742535471916, 'timestamp': '2025-09-30 22:39:24.699553', 'step': 21967, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.732860', 'step': 21967, 'epoch': 3} {'type': 'loss', 'content': 0.07915671169757843, 'timestamp': '2025-09-30 22:39:24.759592', 'step': 21968, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.790493', 'step': 21968, 'epoch': 3} {'type': 'loss', 'content': 0.04899755120277405, 'timestamp': '2025-09-30 22:39:24.795561', 'step': 21969, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.828515', 'step': 21969, 'epoch': 3} {'type': 'loss', 'content': 0.03508230671286583, 'timestamp': '2025-09-30 22:39:24.831735', 'step': 21970, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.864745', 'step': 21970, 'epoch': 3} {'type': 'loss', 'content': 0.08635549247264862, 'timestamp': '2025-09-30 22:39:24.877673', 'step': 21971, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:24.919181', 'step': 21971, 'epoch': 3} {'type': 'loss', 'content': 0.09631799906492233, 'timestamp': '2025-09-30 22:39:24.953213', 'step': 21972, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:24.983866', 'step': 21972, 'epoch': 3} {'type': 'loss', 'content': 0.10635612905025482, 'timestamp': '2025-09-30 22:39:24.988677', 'step': 21973, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:25.019482', 'step': 21973, 'epoch': 3} {'type': 'loss', 'content': 0.023528210818767548, 'timestamp': '2025-09-30 22:39:25.022613', 'step': 21974, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:25.055831', 'step': 21974, 'epoch': 3} {'type': 'loss', 'content': 0.09002416580915451, 'timestamp': '2025-09-30 22:39:25.069480', 'step': 21975, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.107714', 'step': 21975, 'epoch': 3} {'type': 'loss', 'content': 0.018261494114995003, 'timestamp': '2025-09-30 22:39:25.143108', 'step': 21976, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:25.185502', 'step': 21976, 'epoch': 3} {'type': 'loss', 'content': 0.09316445142030716, 'timestamp': '2025-09-30 22:39:25.189931', 'step': 21977, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:25.223838', 'step': 21977, 'epoch': 3} {'type': 'loss', 'content': 0.09439008682966232, 'timestamp': '2025-09-30 22:39:25.228763', 'step': 21978, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:25.262319', 'step': 21978, 'epoch': 3} {'type': 'loss', 'content': 0.12130341678857803, 'timestamp': '2025-09-30 22:39:25.266750', 'step': 21979, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:25.312858', 'step': 21979, 'epoch': 3} {'type': 'loss', 'content': 0.1507239043712616, 'timestamp': '2025-09-30 22:39:25.339351', 'step': 21980, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:25.374607', 'step': 21980, 'epoch': 3} {'type': 'loss', 'content': 0.039598725736141205, 'timestamp': '2025-09-30 22:39:25.377239', 'step': 21981, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:25.410330', 'step': 21981, 'epoch': 3} {'type': 'loss', 'content': 0.13106730580329895, 'timestamp': '2025-09-30 22:39:25.430329', 'step': 21982, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.463642', 'step': 21982, 'epoch': 3} {'type': 'loss', 'content': 0.12088494747877121, 'timestamp': '2025-09-30 22:39:25.478662', 'step': 21983, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:25.523505', 'step': 21983, 'epoch': 3} {'type': 'loss', 'content': 0.09276736527681351, 'timestamp': '2025-09-30 22:39:25.548880', 'step': 21984, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.580355', 'step': 21984, 'epoch': 3} {'type': 'loss', 'content': 0.06795670837163925, 'timestamp': '2025-09-30 22:39:25.596215', 'step': 21985, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.628327', 'step': 21985, 'epoch': 3} {'type': 'loss', 'content': 0.114909827709198, 'timestamp': '2025-09-30 22:39:25.633996', 'step': 21986, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:25.665510', 'step': 21986, 'epoch': 3} {'type': 'loss', 'content': 0.12612663209438324, 'timestamp': '2025-09-30 22:39:25.677145', 'step': 21987, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.717454', 'step': 21987, 'epoch': 3} {'type': 'loss', 'content': 0.012649345211684704, 'timestamp': '2025-09-30 22:39:25.753310', 'step': 21988, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:25.796316', 'step': 21988, 'epoch': 3} {'type': 'loss', 'content': 0.06732279807329178, 'timestamp': '2025-09-30 22:39:25.810670', 'step': 21989, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:25.840866', 'step': 21989, 'epoch': 3} {'type': 'loss', 'content': 0.04362433776259422, 'timestamp': '2025-09-30 22:39:25.844689', 'step': 21990, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:25.879771', 'step': 21990, 'epoch': 3} {'type': 'loss', 'content': 0.07643308490514755, 'timestamp': '2025-09-30 22:39:25.891862', 'step': 21991, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:25.932641', 'step': 21991, 'epoch': 3} {'type': 'loss', 'content': 0.056164439767599106, 'timestamp': '2025-09-30 22:39:25.966727', 'step': 21992, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:25.998590', 'step': 21992, 'epoch': 3} {'type': 'loss', 'content': 0.02710154838860035, 'timestamp': '2025-09-30 22:39:26.003282', 'step': 21993, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:26.051901', 'step': 21993, 'epoch': 3} {'type': 'loss', 'content': 0.08371016383171082, 'timestamp': '2025-09-30 22:39:26.064525', 'step': 21994, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:26.095524', 'step': 21994, 'epoch': 3} {'type': 'loss', 'content': 0.027270548045635223, 'timestamp': '2025-09-30 22:39:26.108711', 'step': 21995, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:26.149936', 'step': 21995, 'epoch': 3} {'type': 'loss', 'content': 0.057302720844745636, 'timestamp': '2025-09-30 22:39:26.176802', 'step': 21996, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:26.222647', 'step': 21996, 'epoch': 3} {'type': 'loss', 'content': 0.023736177012324333, 'timestamp': '2025-09-30 22:39:26.236233', 'step': 21997, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:26.270077', 'step': 21997, 'epoch': 3} {'type': 'loss', 'content': 0.12291881442070007, 'timestamp': '2025-09-30 22:39:26.283052', 'step': 21998, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:26.321175', 'step': 21998, 'epoch': 3} {'type': 'loss', 'content': 0.05313025042414665, 'timestamp': '2025-09-30 22:39:26.333396', 'step': 21999, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:26.373805', 'step': 21999, 'epoch': 3} {'type': 'loss', 'content': 0.04499198868870735, 'timestamp': '2025-09-30 22:39:26.408435', 'step': 22000, 'epoch': 3} {'type': 'info', 'content': 'Checkpoint saved at step 22000', 'timestamp': '2025-09-30 22:39:31.353658', 'step': 22000, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.387997', 'step': 22000, 'epoch': 3} {'type': 'loss', 'content': 0.079457126557827, 'timestamp': '2025-09-30 22:39:31.393383', 'step': 22001, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.426171', 'step': 22001, 'epoch': 3} {'type': 'loss', 'content': 0.03211701661348343, 'timestamp': '2025-09-30 22:39:31.440046', 'step': 22002, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:31.473413', 'step': 22002, 'epoch': 3} {'type': 'loss', 'content': 0.0743151307106018, 'timestamp': '2025-09-30 22:39:31.477656', 'step': 22003, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.509598', 'step': 22003, 'epoch': 3} {'type': 'loss', 'content': 0.07274700701236725, 'timestamp': '2025-09-30 22:39:31.544509', 'step': 22004, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:31.579696', 'step': 22004, 'epoch': 3} {'type': 'loss', 'content': 0.10953256487846375, 'timestamp': '2025-09-30 22:39:31.584906', 'step': 22005, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.616320', 'step': 22005, 'epoch': 3} {'type': 'loss', 'content': 0.06551413983106613, 'timestamp': '2025-09-30 22:39:31.620064', 'step': 22006, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.660340', 'step': 22006, 'epoch': 3} {'type': 'loss', 'content': 0.07208321243524551, 'timestamp': '2025-09-30 22:39:31.664726', 'step': 22007, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.696585', 'step': 22007, 'epoch': 3} {'type': 'loss', 'content': 0.10235492885112762, 'timestamp': '2025-09-30 22:39:31.734973', 'step': 22008, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.767154', 'step': 22008, 'epoch': 3} {'type': 'loss', 'content': 0.06151074171066284, 'timestamp': '2025-09-30 22:39:31.779070', 'step': 22009, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:31.811053', 'step': 22009, 'epoch': 3} {'type': 'loss', 'content': 0.0017722630873322487, 'timestamp': '2025-09-30 22:39:31.815231', 'step': 22010, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:31.848156', 'step': 22010, 'epoch': 3} {'type': 'loss', 'content': 0.05552378296852112, 'timestamp': '2025-09-30 22:39:31.852464', 'step': 22011, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:31.885201', 'step': 22011, 'epoch': 3} {'type': 'loss', 'content': 0.04312707483768463, 'timestamp': '2025-09-30 22:39:31.910041', 'step': 22012, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:31.941011', 'step': 22012, 'epoch': 3} {'type': 'loss', 'content': 0.0773625522851944, 'timestamp': '2025-09-30 22:39:31.945246', 'step': 22013, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:31.977190', 'step': 22013, 'epoch': 3} {'type': 'loss', 'content': 0.013210532255470753, 'timestamp': '2025-09-30 22:39:31.981061', 'step': 22014, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.012541', 'step': 22014, 'epoch': 3} {'type': 'loss', 'content': 0.09681869298219681, 'timestamp': '2025-09-30 22:39:32.017060', 'step': 22015, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:32.049837', 'step': 22015, 'epoch': 3} {'type': 'loss', 'content': 0.09738586843013763, 'timestamp': '2025-09-30 22:39:32.084374', 'step': 22016, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.117825', 'step': 22016, 'epoch': 3} {'type': 'loss', 'content': 0.06405537575483322, 'timestamp': '2025-09-30 22:39:32.133395', 'step': 22017, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:32.174776', 'step': 22017, 'epoch': 3} {'type': 'loss', 'content': 0.02892671525478363, 'timestamp': '2025-09-30 22:39:32.189761', 'step': 22018, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:32.224384', 'step': 22018, 'epoch': 3} {'type': 'loss', 'content': 0.04740992560982704, 'timestamp': '2025-09-30 22:39:32.230363', 'step': 22019, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:32.265360', 'step': 22019, 'epoch': 3} {'type': 'loss', 'content': 0.029034124687314034, 'timestamp': '2025-09-30 22:39:32.293434', 'step': 22020, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.326703', 'step': 22020, 'epoch': 3} {'type': 'loss', 'content': 0.1044459417462349, 'timestamp': '2025-09-30 22:39:32.331012', 'step': 22021, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:32.363329', 'step': 22021, 'epoch': 3} {'type': 'loss', 'content': 0.08404012769460678, 'timestamp': '2025-09-30 22:39:32.377874', 'step': 22022, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.417156', 'step': 22022, 'epoch': 3} {'type': 'loss', 'content': 0.06677795201539993, 'timestamp': '2025-09-30 22:39:32.421641', 'step': 22023, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:32.453168', 'step': 22023, 'epoch': 3} {'type': 'loss', 'content': 0.13531577587127686, 'timestamp': '2025-09-30 22:39:32.478165', 'step': 22024, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.519641', 'step': 22024, 'epoch': 3} {'type': 'loss', 'content': 0.06610184162855148, 'timestamp': '2025-09-30 22:39:32.531860', 'step': 22025, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:32.571510', 'step': 22025, 'epoch': 3} {'type': 'loss', 'content': 0.08069606125354767, 'timestamp': '2025-09-30 22:39:32.576547', 'step': 22026, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:32.609939', 'step': 22026, 'epoch': 3} {'type': 'loss', 'content': 0.009750214405357838, 'timestamp': '2025-09-30 22:39:32.615410', 'step': 22027, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:32.648489', 'step': 22027, 'epoch': 3} {'type': 'loss', 'content': 0.042058076709508896, 'timestamp': '2025-09-30 22:39:32.674398', 'step': 22028, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:32.706780', 'step': 22028, 'epoch': 3} {'type': 'loss', 'content': 0.027722882106900215, 'timestamp': '2025-09-30 22:39:32.711529', 'step': 22029, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:32.743428', 'step': 22029, 'epoch': 3} {'type': 'loss', 'content': 0.038916971534490585, 'timestamp': '2025-09-30 22:39:32.747128', 'step': 22030, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:32.778905', 'step': 22030, 'epoch': 3} {'type': 'loss', 'content': 0.06163987144827843, 'timestamp': '2025-09-30 22:39:32.782908', 'step': 22031, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:32.817477', 'step': 22031, 'epoch': 3} {'type': 'loss', 'content': 0.11235670745372772, 'timestamp': '2025-09-30 22:39:32.852124', 'step': 22032, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.883929', 'step': 22032, 'epoch': 3} {'type': 'loss', 'content': 0.07659601420164108, 'timestamp': '2025-09-30 22:39:32.887992', 'step': 22033, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.920258', 'step': 22033, 'epoch': 3} {'type': 'loss', 'content': 0.04263443127274513, 'timestamp': '2025-09-30 22:39:32.926642', 'step': 22034, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:32.958572', 'step': 22034, 'epoch': 3} {'type': 'loss', 'content': 0.10139329731464386, 'timestamp': '2025-09-30 22:39:32.963370', 'step': 22035, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:32.996436', 'step': 22035, 'epoch': 3} {'type': 'loss', 'content': 0.011539710685610771, 'timestamp': '2025-09-30 22:39:33.023764', 'step': 22036, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.067945', 'step': 22036, 'epoch': 3} {'type': 'loss', 'content': 0.07208460569381714, 'timestamp': '2025-09-30 22:39:33.074114', 'step': 22037, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.108156', 'step': 22037, 'epoch': 3} {'type': 'loss', 'content': 0.10415084660053253, 'timestamp': '2025-09-30 22:39:33.119018', 'step': 22038, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:33.153742', 'step': 22038, 'epoch': 3} {'type': 'loss', 'content': 0.010961130261421204, 'timestamp': '2025-09-30 22:39:33.157575', 'step': 22039, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.189223', 'step': 22039, 'epoch': 3} {'type': 'loss', 'content': 0.08506064116954803, 'timestamp': '2025-09-30 22:39:33.216860', 'step': 22040, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:33.257496', 'step': 22040, 'epoch': 3} {'type': 'loss', 'content': 0.14573568105697632, 'timestamp': '2025-09-30 22:39:33.261901', 'step': 22041, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.295923', 'step': 22041, 'epoch': 3} {'type': 'loss', 'content': 0.0314730629324913, 'timestamp': '2025-09-30 22:39:33.309652', 'step': 22042, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.343307', 'step': 22042, 'epoch': 3} {'type': 'loss', 'content': 0.06404091417789459, 'timestamp': '2025-09-30 22:39:33.346691', 'step': 22043, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.378361', 'step': 22043, 'epoch': 3} {'type': 'loss', 'content': 0.03875460848212242, 'timestamp': '2025-09-30 22:39:33.404354', 'step': 22044, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.437755', 'step': 22044, 'epoch': 3} {'type': 'loss', 'content': 0.02890123799443245, 'timestamp': '2025-09-30 22:39:33.443751', 'step': 22045, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.478373', 'step': 22045, 'epoch': 3} {'type': 'loss', 'content': 0.05632622539997101, 'timestamp': '2025-09-30 22:39:33.483865', 'step': 22046, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.519769', 'step': 22046, 'epoch': 3} {'type': 'loss', 'content': 0.022722678259015083, 'timestamp': '2025-09-30 22:39:33.523639', 'step': 22047, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.555311', 'step': 22047, 'epoch': 3} {'type': 'loss', 'content': 0.059151846915483475, 'timestamp': '2025-09-30 22:39:33.581136', 'step': 22048, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:33.612887', 'step': 22048, 'epoch': 3} {'type': 'loss', 'content': 0.026177173480391502, 'timestamp': '2025-09-30 22:39:33.618267', 'step': 22049, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.650658', 'step': 22049, 'epoch': 3} {'type': 'loss', 'content': 0.058686092495918274, 'timestamp': '2025-09-30 22:39:33.655708', 'step': 22050, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.689039', 'step': 22050, 'epoch': 3} {'type': 'loss', 'content': 0.04565344750881195, 'timestamp': '2025-09-30 22:39:33.693869', 'step': 22051, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:33.728108', 'step': 22051, 'epoch': 3} {'type': 'loss', 'content': 0.0372091606259346, 'timestamp': '2025-09-30 22:39:33.754844', 'step': 22052, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:33.787051', 'step': 22052, 'epoch': 3} {'type': 'loss', 'content': 0.0697086900472641, 'timestamp': '2025-09-30 22:39:33.793921', 'step': 22053, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.826444', 'step': 22053, 'epoch': 3} {'type': 'loss', 'content': 0.036037057638168335, 'timestamp': '2025-09-30 22:39:33.835665', 'step': 22054, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:33.874083', 'step': 22054, 'epoch': 3} {'type': 'loss', 'content': 0.02562810480594635, 'timestamp': '2025-09-30 22:39:33.879151', 'step': 22055, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:33.923757', 'step': 22055, 'epoch': 3} {'type': 'loss', 'content': 0.07561398297548294, 'timestamp': '2025-09-30 22:39:33.949474', 'step': 22056, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:33.982064', 'step': 22056, 'epoch': 3} {'type': 'loss', 'content': 0.03411554917693138, 'timestamp': '2025-09-30 22:39:33.995614', 'step': 22057, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.028993', 'step': 22057, 'epoch': 3} {'type': 'loss', 'content': 0.02955816686153412, 'timestamp': '2025-09-30 22:39:34.034766', 'step': 22058, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:34.065149', 'step': 22058, 'epoch': 3} {'type': 'loss', 'content': 0.024925097823143005, 'timestamp': '2025-09-30 22:39:34.068741', 'step': 22059, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.107086', 'step': 22059, 'epoch': 3} {'type': 'loss', 'content': 0.04817207530140877, 'timestamp': '2025-09-30 22:39:34.132833', 'step': 22060, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:34.164713', 'step': 22060, 'epoch': 3} {'type': 'loss', 'content': 0.03833043947815895, 'timestamp': '2025-09-30 22:39:34.168067', 'step': 22061, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.214104', 'step': 22061, 'epoch': 3} {'type': 'loss', 'content': 0.018652036786079407, 'timestamp': '2025-09-30 22:39:34.218676', 'step': 22062, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:34.250374', 'step': 22062, 'epoch': 3} {'type': 'loss', 'content': 0.07652650028467178, 'timestamp': '2025-09-30 22:39:34.254600', 'step': 22063, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:34.287154', 'step': 22063, 'epoch': 3} {'type': 'loss', 'content': 0.10007574409246445, 'timestamp': '2025-09-30 22:39:34.312218', 'step': 22064, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:34.358154', 'step': 22064, 'epoch': 3} {'type': 'loss', 'content': 0.014776877127587795, 'timestamp': '2025-09-30 22:39:34.370560', 'step': 22065, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.418329', 'step': 22065, 'epoch': 3} {'type': 'loss', 'content': 0.04070928692817688, 'timestamp': '2025-09-30 22:39:34.422717', 'step': 22066, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.455373', 'step': 22066, 'epoch': 3} {'type': 'loss', 'content': 0.037852875888347626, 'timestamp': '2025-09-30 22:39:34.469625', 'step': 22067, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.514801', 'step': 22067, 'epoch': 3} {'type': 'loss', 'content': 0.0480794794857502, 'timestamp': '2025-09-30 22:39:34.541507', 'step': 22068, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.574072', 'step': 22068, 'epoch': 3} {'type': 'loss', 'content': 0.09048504382371902, 'timestamp': '2025-09-30 22:39:34.594601', 'step': 22069, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.635852', 'step': 22069, 'epoch': 3} {'type': 'loss', 'content': 0.04636164382100105, 'timestamp': '2025-09-30 22:39:34.651206', 'step': 22070, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.682425', 'step': 22070, 'epoch': 3} {'type': 'loss', 'content': 0.06318353861570358, 'timestamp': '2025-09-30 22:39:34.695457', 'step': 22071, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:34.738712', 'step': 22071, 'epoch': 3} {'type': 'loss', 'content': 0.08389703929424286, 'timestamp': '2025-09-30 22:39:34.770597', 'step': 22072, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.800494', 'step': 22072, 'epoch': 3} {'type': 'loss', 'content': 0.0649511069059372, 'timestamp': '2025-09-30 22:39:34.805456', 'step': 22073, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.838830', 'step': 22073, 'epoch': 3} {'type': 'loss', 'content': 0.03738880902528763, 'timestamp': '2025-09-30 22:39:34.843507', 'step': 22074, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:34.877882', 'step': 22074, 'epoch': 3} {'type': 'loss', 'content': 0.04823748394846916, 'timestamp': '2025-09-30 22:39:34.888853', 'step': 22075, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:34.923897', 'step': 22075, 'epoch': 3} {'type': 'loss', 'content': 0.05843309685587883, 'timestamp': '2025-09-30 22:39:34.948387', 'step': 22076, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:34.981348', 'step': 22076, 'epoch': 3} {'type': 'loss', 'content': 0.03396565839648247, 'timestamp': '2025-09-30 22:39:34.986948', 'step': 22077, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:39:35.019805', 'step': 22077, 'epoch': 3} {'type': 'loss', 'content': 0.05680141597986221, 'timestamp': '2025-09-30 22:39:35.024393', 'step': 22078, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:35.055454', 'step': 22078, 'epoch': 3} {'type': 'loss', 'content': 0.13770762085914612, 'timestamp': '2025-09-30 22:39:35.061927', 'step': 22079, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:35.106683', 'step': 22079, 'epoch': 3} {'type': 'loss', 'content': 0.006622949615120888, 'timestamp': '2025-09-30 22:39:35.133928', 'step': 22080, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:35.166930', 'step': 22080, 'epoch': 3} {'type': 'loss', 'content': 0.10766327381134033, 'timestamp': '2025-09-30 22:39:35.171700', 'step': 22081, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.203667', 'step': 22081, 'epoch': 3} {'type': 'loss', 'content': 0.06845641136169434, 'timestamp': '2025-09-30 22:39:35.206481', 'step': 22082, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:35.239474', 'step': 22082, 'epoch': 3} {'type': 'loss', 'content': 0.08732255548238754, 'timestamp': '2025-09-30 22:39:35.243278', 'step': 22083, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.276694', 'step': 22083, 'epoch': 3} {'type': 'loss', 'content': 0.07602310925722122, 'timestamp': '2025-09-30 22:39:35.306751', 'step': 22084, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.341180', 'step': 22084, 'epoch': 3} {'type': 'loss', 'content': 0.06840033829212189, 'timestamp': '2025-09-30 22:39:35.348223', 'step': 22085, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.384727', 'step': 22085, 'epoch': 3} {'type': 'loss', 'content': 0.04476862773299217, 'timestamp': '2025-09-30 22:39:35.387973', 'step': 22086, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.420728', 'step': 22086, 'epoch': 3} {'type': 'loss', 'content': 0.09774492681026459, 'timestamp': '2025-09-30 22:39:35.429564', 'step': 22087, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:35.466094', 'step': 22087, 'epoch': 3} {'type': 'loss', 'content': 0.04287232831120491, 'timestamp': '2025-09-30 22:39:35.501940', 'step': 22088, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:35.537168', 'step': 22088, 'epoch': 3} {'type': 'loss', 'content': 0.07095465809106827, 'timestamp': '2025-09-30 22:39:35.541042', 'step': 22089, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:35.572393', 'step': 22089, 'epoch': 3} {'type': 'loss', 'content': 0.04636925458908081, 'timestamp': '2025-09-30 22:39:35.578871', 'step': 22090, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.611278', 'step': 22090, 'epoch': 3} {'type': 'loss', 'content': 0.05333694815635681, 'timestamp': '2025-09-30 22:39:35.616708', 'step': 22091, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.661413', 'step': 22091, 'epoch': 3} {'type': 'loss', 'content': 0.04698924347758293, 'timestamp': '2025-09-30 22:39:35.687374', 'step': 22092, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.725261', 'step': 22092, 'epoch': 3} {'type': 'loss', 'content': 0.021909819915890694, 'timestamp': '2025-09-30 22:39:35.734757', 'step': 22093, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:35.767264', 'step': 22093, 'epoch': 3} {'type': 'loss', 'content': 0.04291319102048874, 'timestamp': '2025-09-30 22:39:35.773212', 'step': 22094, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.806868', 'step': 22094, 'epoch': 3} {'type': 'loss', 'content': 0.05803842470049858, 'timestamp': '2025-09-30 22:39:35.823059', 'step': 22095, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:35.857776', 'step': 22095, 'epoch': 3} {'type': 'loss', 'content': 0.08240574598312378, 'timestamp': '2025-09-30 22:39:35.885325', 'step': 22096, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.928314', 'step': 22096, 'epoch': 3} {'type': 'loss', 'content': 0.12302935123443604, 'timestamp': '2025-09-30 22:39:35.939626', 'step': 22097, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:35.976337', 'step': 22097, 'epoch': 3} {'type': 'loss', 'content': 0.08978342264890671, 'timestamp': '2025-09-30 22:39:35.979725', 'step': 22098, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:36.014230', 'step': 22098, 'epoch': 3} {'type': 'loss', 'content': 0.03158291429281235, 'timestamp': '2025-09-30 22:39:36.018228', 'step': 22099, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.052475', 'step': 22099, 'epoch': 3} {'type': 'loss', 'content': 0.05473583564162254, 'timestamp': '2025-09-30 22:39:36.078436', 'step': 22100, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:36.110395', 'step': 22100, 'epoch': 3} {'type': 'loss', 'content': 0.10908537358045578, 'timestamp': '2025-09-30 22:39:36.114745', 'step': 22101, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:36.146397', 'step': 22101, 'epoch': 3} {'type': 'loss', 'content': 0.07368969172239304, 'timestamp': '2025-09-30 22:39:36.157578', 'step': 22102, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:36.202395', 'step': 22102, 'epoch': 3} {'type': 'loss', 'content': 0.04998309910297394, 'timestamp': '2025-09-30 22:39:36.205433', 'step': 22103, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:36.236606', 'step': 22103, 'epoch': 3} {'type': 'loss', 'content': 0.11386745423078537, 'timestamp': '2025-09-30 22:39:36.262598', 'step': 22104, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.293038', 'step': 22104, 'epoch': 3} {'type': 'loss', 'content': 0.11943825334310532, 'timestamp': '2025-09-30 22:39:36.296290', 'step': 22105, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.327120', 'step': 22105, 'epoch': 3} {'type': 'loss', 'content': 0.044099707156419754, 'timestamp': '2025-09-30 22:39:36.333721', 'step': 22106, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.367645', 'step': 22106, 'epoch': 3} {'type': 'loss', 'content': 0.0877738744020462, 'timestamp': '2025-09-30 22:39:36.373267', 'step': 22107, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.417643', 'step': 22107, 'epoch': 3} {'type': 'loss', 'content': 0.05273011699318886, 'timestamp': '2025-09-30 22:39:36.442941', 'step': 22108, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:36.475588', 'step': 22108, 'epoch': 3} {'type': 'loss', 'content': 0.07181108742952347, 'timestamp': '2025-09-30 22:39:36.483037', 'step': 22109, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:36.514158', 'step': 22109, 'epoch': 3} {'type': 'loss', 'content': 0.07343358546495438, 'timestamp': '2025-09-30 22:39:36.518402', 'step': 22110, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.551540', 'step': 22110, 'epoch': 3} {'type': 'loss', 'content': 0.06571545451879501, 'timestamp': '2025-09-30 22:39:36.559213', 'step': 22111, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.595755', 'step': 22111, 'epoch': 3} {'type': 'loss', 'content': 0.093711718916893, 'timestamp': '2025-09-30 22:39:36.634397', 'step': 22112, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:36.689174', 'step': 22112, 'epoch': 3} {'type': 'loss', 'content': 0.09994641691446304, 'timestamp': '2025-09-30 22:39:36.699672', 'step': 22113, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.741643', 'step': 22113, 'epoch': 3} {'type': 'loss', 'content': 0.0879170373082161, 'timestamp': '2025-09-30 22:39:36.744299', 'step': 22114, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:36.776098', 'step': 22114, 'epoch': 3} {'type': 'loss', 'content': 0.11122194677591324, 'timestamp': '2025-09-30 22:39:36.779221', 'step': 22115, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:36.810973', 'step': 22115, 'epoch': 3} {'type': 'loss', 'content': 0.05359237268567085, 'timestamp': '2025-09-30 22:39:36.840166', 'step': 22116, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:36.872081', 'step': 22116, 'epoch': 3} {'type': 'loss', 'content': 0.08219214528799057, 'timestamp': '2025-09-30 22:39:36.883920', 'step': 22117, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:36.917367', 'step': 22117, 'epoch': 3} {'type': 'loss', 'content': 0.02748136594891548, 'timestamp': '2025-09-30 22:39:36.920671', 'step': 22118, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:36.952588', 'step': 22118, 'epoch': 3} {'type': 'loss', 'content': 0.03480370342731476, 'timestamp': '2025-09-30 22:39:36.968233', 'step': 22119, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.008689', 'step': 22119, 'epoch': 3} {'type': 'loss', 'content': 0.05230960249900818, 'timestamp': '2025-09-30 22:39:37.035309', 'step': 22120, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.082900', 'step': 22120, 'epoch': 3} {'type': 'loss', 'content': 0.04587755724787712, 'timestamp': '2025-09-30 22:39:37.086944', 'step': 22121, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:37.120273', 'step': 22121, 'epoch': 3} {'type': 'loss', 'content': 0.08546919375658035, 'timestamp': '2025-09-30 22:39:37.126022', 'step': 22122, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.173322', 'step': 22122, 'epoch': 3} {'type': 'loss', 'content': 0.14061479270458221, 'timestamp': '2025-09-30 22:39:37.177933', 'step': 22123, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:37.212405', 'step': 22123, 'epoch': 3} {'type': 'loss', 'content': 0.05918741971254349, 'timestamp': '2025-09-30 22:39:37.239978', 'step': 22124, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.279068', 'step': 22124, 'epoch': 3} {'type': 'loss', 'content': 0.05527763068675995, 'timestamp': '2025-09-30 22:39:37.289189', 'step': 22125, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.328447', 'step': 22125, 'epoch': 3} {'type': 'loss', 'content': 0.02062932588160038, 'timestamp': '2025-09-30 22:39:37.335695', 'step': 22126, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.367988', 'step': 22126, 'epoch': 3} {'type': 'loss', 'content': 0.10053158551454544, 'timestamp': '2025-09-30 22:39:37.371355', 'step': 22127, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.413422', 'step': 22127, 'epoch': 3} {'type': 'loss', 'content': 0.05988345295190811, 'timestamp': '2025-09-30 22:39:37.439014', 'step': 22128, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:37.471896', 'step': 22128, 'epoch': 3} {'type': 'loss', 'content': 0.0460347943007946, 'timestamp': '2025-09-30 22:39:37.488278', 'step': 22129, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:37.524731', 'step': 22129, 'epoch': 3} {'type': 'loss', 'content': 0.05496639758348465, 'timestamp': '2025-09-30 22:39:37.531331', 'step': 22130, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.566159', 'step': 22130, 'epoch': 3} {'type': 'loss', 'content': 0.024355260655283928, 'timestamp': '2025-09-30 22:39:37.572948', 'step': 22131, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:37.619923', 'step': 22131, 'epoch': 3} {'type': 'loss', 'content': 0.044064901769161224, 'timestamp': '2025-09-30 22:39:37.644554', 'step': 22132, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.675327', 'step': 22132, 'epoch': 3} {'type': 'loss', 'content': 0.08453058451414108, 'timestamp': '2025-09-30 22:39:37.680653', 'step': 22133, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.712854', 'step': 22133, 'epoch': 3} {'type': 'loss', 'content': 0.11286675930023193, 'timestamp': '2025-09-30 22:39:37.718550', 'step': 22134, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.752580', 'step': 22134, 'epoch': 3} {'type': 'loss', 'content': 0.05545752868056297, 'timestamp': '2025-09-30 22:39:37.756011', 'step': 22135, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.786855', 'step': 22135, 'epoch': 3} {'type': 'loss', 'content': 0.04975922778248787, 'timestamp': '2025-09-30 22:39:37.811952', 'step': 22136, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.847053', 'step': 22136, 'epoch': 3} {'type': 'loss', 'content': 0.0829901397228241, 'timestamp': '2025-09-30 22:39:37.858040', 'step': 22137, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:37.895173', 'step': 22137, 'epoch': 3} {'type': 'loss', 'content': 0.0759357437491417, 'timestamp': '2025-09-30 22:39:37.900477', 'step': 22138, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:37.933077', 'step': 22138, 'epoch': 3} {'type': 'loss', 'content': 0.038760870695114136, 'timestamp': '2025-09-30 22:39:37.938888', 'step': 22139, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:37.975571', 'step': 22139, 'epoch': 3} {'type': 'loss', 'content': 0.09141167998313904, 'timestamp': '2025-09-30 22:39:38.004524', 'step': 22140, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.037102', 'step': 22140, 'epoch': 3} {'type': 'loss', 'content': 0.0547807440161705, 'timestamp': '2025-09-30 22:39:38.042403', 'step': 22141, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.077439', 'step': 22141, 'epoch': 3} {'type': 'loss', 'content': 0.07180614024400711, 'timestamp': '2025-09-30 22:39:38.081564', 'step': 22142, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:38.114791', 'step': 22142, 'epoch': 3} {'type': 'loss', 'content': 0.06160036474466324, 'timestamp': '2025-09-30 22:39:38.124598', 'step': 22143, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:38.162876', 'step': 22143, 'epoch': 3} {'type': 'loss', 'content': 0.057058461010456085, 'timestamp': '2025-09-30 22:39:38.189317', 'step': 22144, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.221872', 'step': 22144, 'epoch': 3} {'type': 'loss', 'content': 0.08966804295778275, 'timestamp': '2025-09-30 22:39:38.225050', 'step': 22145, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.256164', 'step': 22145, 'epoch': 3} {'type': 'loss', 'content': 0.08904791623353958, 'timestamp': '2025-09-30 22:39:38.261069', 'step': 22146, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.295057', 'step': 22146, 'epoch': 3} {'type': 'loss', 'content': 0.04129057750105858, 'timestamp': '2025-09-30 22:39:38.301491', 'step': 22147, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.332450', 'step': 22147, 'epoch': 3} {'type': 'loss', 'content': 0.024301137775182724, 'timestamp': '2025-09-30 22:39:38.358083', 'step': 22148, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:38.393533', 'step': 22148, 'epoch': 3} {'type': 'loss', 'content': 0.0569203682243824, 'timestamp': '2025-09-30 22:39:38.404299', 'step': 22149, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.442292', 'step': 22149, 'epoch': 3} {'type': 'loss', 'content': 0.07282360643148422, 'timestamp': '2025-09-30 22:39:38.447379', 'step': 22150, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.493763', 'step': 22150, 'epoch': 3} {'type': 'loss', 'content': 0.08964724838733673, 'timestamp': '2025-09-30 22:39:38.500268', 'step': 22151, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.543233', 'step': 22151, 'epoch': 3} {'type': 'loss', 'content': 0.01271080132573843, 'timestamp': '2025-09-30 22:39:38.575384', 'step': 22152, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.618082', 'step': 22152, 'epoch': 3} {'type': 'loss', 'content': 0.02603541500866413, 'timestamp': '2025-09-30 22:39:38.630042', 'step': 22153, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.664458', 'step': 22153, 'epoch': 3} {'type': 'loss', 'content': 0.07707668095827103, 'timestamp': '2025-09-30 22:39:38.669841', 'step': 22154, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.701982', 'step': 22154, 'epoch': 3} {'type': 'loss', 'content': 0.009608506225049496, 'timestamp': '2025-09-30 22:39:38.713028', 'step': 22155, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:38.750838', 'step': 22155, 'epoch': 3} {'type': 'loss', 'content': 0.02483534999191761, 'timestamp': '2025-09-30 22:39:38.775735', 'step': 22156, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.811198', 'step': 22156, 'epoch': 3} {'type': 'loss', 'content': 0.04467594996094704, 'timestamp': '2025-09-30 22:39:38.816981', 'step': 22157, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:38.848888', 'step': 22157, 'epoch': 3} {'type': 'loss', 'content': 0.029285604134202003, 'timestamp': '2025-09-30 22:39:38.853138', 'step': 22158, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:38.893343', 'step': 22158, 'epoch': 3} {'type': 'loss', 'content': 0.12739723920822144, 'timestamp': '2025-09-30 22:39:38.896443', 'step': 22159, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:38.927104', 'step': 22159, 'epoch': 3} {'type': 'loss', 'content': 0.11651063710451126, 'timestamp': '2025-09-30 22:39:38.952338', 'step': 22160, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:38.989176', 'step': 22160, 'epoch': 3} {'type': 'loss', 'content': 0.06607198715209961, 'timestamp': '2025-09-30 22:39:38.997366', 'step': 22161, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.037391', 'step': 22161, 'epoch': 3} {'type': 'loss', 'content': 0.052010830491781235, 'timestamp': '2025-09-30 22:39:39.041290', 'step': 22162, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.086394', 'step': 22162, 'epoch': 3} {'type': 'loss', 'content': 0.12755349278450012, 'timestamp': '2025-09-30 22:39:39.090052', 'step': 22163, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.122456', 'step': 22163, 'epoch': 3} {'type': 'loss', 'content': 0.02860482782125473, 'timestamp': '2025-09-30 22:39:39.147677', 'step': 22164, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.179918', 'step': 22164, 'epoch': 3} {'type': 'loss', 'content': 0.08332505822181702, 'timestamp': '2025-09-30 22:39:39.182785', 'step': 22165, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.212748', 'step': 22165, 'epoch': 3} {'type': 'loss', 'content': 0.10021034628152847, 'timestamp': '2025-09-30 22:39:39.216656', 'step': 22166, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.250715', 'step': 22166, 'epoch': 3} {'type': 'loss', 'content': 0.02315623126924038, 'timestamp': '2025-09-30 22:39:39.254093', 'step': 22167, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.286172', 'step': 22167, 'epoch': 3} {'type': 'loss', 'content': 0.11342333257198334, 'timestamp': '2025-09-30 22:39:39.310730', 'step': 22168, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:39.350166', 'step': 22168, 'epoch': 3} {'type': 'loss', 'content': 0.05990080535411835, 'timestamp': '2025-09-30 22:39:39.354598', 'step': 22169, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.384747', 'step': 22169, 'epoch': 3} {'type': 'loss', 'content': 0.06277836114168167, 'timestamp': '2025-09-30 22:39:39.399509', 'step': 22170, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.434525', 'step': 22170, 'epoch': 3} {'type': 'loss', 'content': 0.018702112138271332, 'timestamp': '2025-09-30 22:39:39.438540', 'step': 22171, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.481574', 'step': 22171, 'epoch': 3} {'type': 'loss', 'content': 0.0950784757733345, 'timestamp': '2025-09-30 22:39:39.507598', 'step': 22172, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.539936', 'step': 22172, 'epoch': 3} {'type': 'loss', 'content': 0.11724231392145157, 'timestamp': '2025-09-30 22:39:39.542826', 'step': 22173, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:39.573170', 'step': 22173, 'epoch': 3} {'type': 'loss', 'content': 0.07581232488155365, 'timestamp': '2025-09-30 22:39:39.589192', 'step': 22174, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.621471', 'step': 22174, 'epoch': 3} {'type': 'loss', 'content': 0.04800713062286377, 'timestamp': '2025-09-30 22:39:39.624393', 'step': 22175, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.656203', 'step': 22175, 'epoch': 3} {'type': 'loss', 'content': 0.07326000183820724, 'timestamp': '2025-09-30 22:39:39.680746', 'step': 22176, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:39.711419', 'step': 22176, 'epoch': 3} {'type': 'loss', 'content': 0.025230634957551956, 'timestamp': '2025-09-30 22:39:39.713899', 'step': 22177, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.756120', 'step': 22177, 'epoch': 3} {'type': 'loss', 'content': 0.07996070384979248, 'timestamp': '2025-09-30 22:39:39.765858', 'step': 22178, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:39.796755', 'step': 22178, 'epoch': 3} {'type': 'loss', 'content': 0.11112252622842789, 'timestamp': '2025-09-30 22:39:39.801396', 'step': 22179, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.842885', 'step': 22179, 'epoch': 3} {'type': 'loss', 'content': 0.116075798869133, 'timestamp': '2025-09-30 22:39:39.868307', 'step': 22180, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:39.905461', 'step': 22180, 'epoch': 3} {'type': 'loss', 'content': 0.1152387410402298, 'timestamp': '2025-09-30 22:39:39.909233', 'step': 22181, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:39.941102', 'step': 22181, 'epoch': 3} {'type': 'loss', 'content': 0.01516725029796362, 'timestamp': '2025-09-30 22:39:39.946933', 'step': 22182, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:39.992670', 'step': 22182, 'epoch': 3} {'type': 'loss', 'content': 0.04092814400792122, 'timestamp': '2025-09-30 22:39:39.999825', 'step': 22183, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.029574', 'step': 22183, 'epoch': 3} {'type': 'loss', 'content': 0.061047907918691635, 'timestamp': '2025-09-30 22:39:40.055049', 'step': 22184, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.085775', 'step': 22184, 'epoch': 3} {'type': 'loss', 'content': 0.08393599838018417, 'timestamp': '2025-09-30 22:39:40.090357', 'step': 22185, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:40.123043', 'step': 22185, 'epoch': 3} {'type': 'loss', 'content': 0.0742654800415039, 'timestamp': '2025-09-30 22:39:40.133711', 'step': 22186, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.166645', 'step': 22186, 'epoch': 3} {'type': 'loss', 'content': 0.02007804811000824, 'timestamp': '2025-09-30 22:39:40.170508', 'step': 22187, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.213602', 'step': 22187, 'epoch': 3} {'type': 'loss', 'content': 0.08634241670370102, 'timestamp': '2025-09-30 22:39:40.240767', 'step': 22188, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.274372', 'step': 22188, 'epoch': 3} {'type': 'loss', 'content': 0.054697323590517044, 'timestamp': '2025-09-30 22:39:40.294818', 'step': 22189, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.325836', 'step': 22189, 'epoch': 3} {'type': 'loss', 'content': 0.046767763793468475, 'timestamp': '2025-09-30 22:39:40.334356', 'step': 22190, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.365774', 'step': 22190, 'epoch': 3} {'type': 'loss', 'content': 0.13656733930110931, 'timestamp': '2025-09-30 22:39:40.373693', 'step': 22191, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:40.404440', 'step': 22191, 'epoch': 3} {'type': 'loss', 'content': 0.0653301551938057, 'timestamp': '2025-09-30 22:39:40.429897', 'step': 22192, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.460881', 'step': 22192, 'epoch': 3} {'type': 'loss', 'content': 0.06846538186073303, 'timestamp': '2025-09-30 22:39:40.473865', 'step': 22193, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:40.514311', 'step': 22193, 'epoch': 3} {'type': 'loss', 'content': 0.03009001724421978, 'timestamp': '2025-09-30 22:39:40.521640', 'step': 22194, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.566059', 'step': 22194, 'epoch': 3} {'type': 'loss', 'content': 0.026943325996398926, 'timestamp': '2025-09-30 22:39:40.575834', 'step': 22195, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:40.613472', 'step': 22195, 'epoch': 3} {'type': 'loss', 'content': 0.1171344444155693, 'timestamp': '2025-09-30 22:39:40.640942', 'step': 22196, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.671590', 'step': 22196, 'epoch': 3} {'type': 'loss', 'content': 0.014726492576301098, 'timestamp': '2025-09-30 22:39:40.675738', 'step': 22197, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:40.715869', 'step': 22197, 'epoch': 3} {'type': 'loss', 'content': 0.09594728797674179, 'timestamp': '2025-09-30 22:39:40.719253', 'step': 22198, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.750503', 'step': 22198, 'epoch': 3} {'type': 'loss', 'content': 0.03219545632600784, 'timestamp': '2025-09-30 22:39:40.754816', 'step': 22199, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.785838', 'step': 22199, 'epoch': 3} {'type': 'loss', 'content': 0.025405509397387505, 'timestamp': '2025-09-30 22:39:40.812136', 'step': 22200, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.844162', 'step': 22200, 'epoch': 3} {'type': 'loss', 'content': 0.11572854220867157, 'timestamp': '2025-09-30 22:39:40.847691', 'step': 22201, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:40.882434', 'step': 22201, 'epoch': 3} {'type': 'loss', 'content': 0.046072568744421005, 'timestamp': '2025-09-30 22:39:40.886523', 'step': 22202, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.917383', 'step': 22202, 'epoch': 3} {'type': 'loss', 'content': 0.06265979260206223, 'timestamp': '2025-09-30 22:39:40.922053', 'step': 22203, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:40.954265', 'step': 22203, 'epoch': 3} {'type': 'loss', 'content': 0.0029342041816562414, 'timestamp': '2025-09-30 22:39:40.980508', 'step': 22204, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:41.012569', 'step': 22204, 'epoch': 3} {'type': 'loss', 'content': 0.020274192094802856, 'timestamp': '2025-09-30 22:39:41.017129', 'step': 22205, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:41.053483', 'step': 22205, 'epoch': 3} {'type': 'loss', 'content': 0.06533029675483704, 'timestamp': '2025-09-30 22:39:41.068156', 'step': 22206, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.108791', 'step': 22206, 'epoch': 3} {'type': 'loss', 'content': 0.07753336429595947, 'timestamp': '2025-09-30 22:39:41.113876', 'step': 22207, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.146217', 'step': 22207, 'epoch': 3} {'type': 'loss', 'content': 0.05289841815829277, 'timestamp': '2025-09-30 22:39:41.172037', 'step': 22208, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:41.202674', 'step': 22208, 'epoch': 3} {'type': 'loss', 'content': 0.08409510552883148, 'timestamp': '2025-09-30 22:39:41.213106', 'step': 22209, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.253021', 'step': 22209, 'epoch': 3} {'type': 'loss', 'content': 0.05044200271368027, 'timestamp': '2025-09-30 22:39:41.259007', 'step': 22210, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.292107', 'step': 22210, 'epoch': 3} {'type': 'loss', 'content': 0.06935473531484604, 'timestamp': '2025-09-30 22:39:41.299113', 'step': 22211, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.330764', 'step': 22211, 'epoch': 3} {'type': 'loss', 'content': 0.026156166568398476, 'timestamp': '2025-09-30 22:39:41.357732', 'step': 22212, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.389436', 'step': 22212, 'epoch': 3} {'type': 'loss', 'content': 0.07178791612386703, 'timestamp': '2025-09-30 22:39:41.403474', 'step': 22213, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.434713', 'step': 22213, 'epoch': 3} {'type': 'loss', 'content': 0.03565209358930588, 'timestamp': '2025-09-30 22:39:41.440949', 'step': 22214, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.481361', 'step': 22214, 'epoch': 3} {'type': 'loss', 'content': 0.03210357949137688, 'timestamp': '2025-09-30 22:39:41.483718', 'step': 22215, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.515801', 'step': 22215, 'epoch': 3} {'type': 'loss', 'content': 0.07265258580446243, 'timestamp': '2025-09-30 22:39:41.546770', 'step': 22216, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.578599', 'step': 22216, 'epoch': 3} {'type': 'loss', 'content': 0.058086175471544266, 'timestamp': '2025-09-30 22:39:41.581327', 'step': 22217, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.611842', 'step': 22217, 'epoch': 3} {'type': 'loss', 'content': 0.10555977374315262, 'timestamp': '2025-09-30 22:39:41.616094', 'step': 22218, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.649182', 'step': 22218, 'epoch': 3} {'type': 'loss', 'content': 0.030442968010902405, 'timestamp': '2025-09-30 22:39:41.652806', 'step': 22219, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.685366', 'step': 22219, 'epoch': 3} {'type': 'loss', 'content': 0.01989857107400894, 'timestamp': '2025-09-30 22:39:41.718010', 'step': 22220, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.748635', 'step': 22220, 'epoch': 3} {'type': 'loss', 'content': 0.08369248360395432, 'timestamp': '2025-09-30 22:39:41.751920', 'step': 22221, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.783645', 'step': 22221, 'epoch': 3} {'type': 'loss', 'content': 0.09323848783969879, 'timestamp': '2025-09-30 22:39:41.788793', 'step': 22222, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:41.820665', 'step': 22222, 'epoch': 3} {'type': 'loss', 'content': 0.059138618409633636, 'timestamp': '2025-09-30 22:39:41.826025', 'step': 22223, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:41.857856', 'step': 22223, 'epoch': 3} {'type': 'loss', 'content': 0.004106023348867893, 'timestamp': '2025-09-30 22:39:41.886618', 'step': 22224, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:41.918110', 'step': 22224, 'epoch': 3} {'type': 'loss', 'content': 0.023504065349698067, 'timestamp': '2025-09-30 22:39:41.920580', 'step': 22225, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:41.951350', 'step': 22225, 'epoch': 3} {'type': 'loss', 'content': 0.08629459887742996, 'timestamp': '2025-09-30 22:39:41.956139', 'step': 22226, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:41.999859', 'step': 22226, 'epoch': 3} {'type': 'loss', 'content': 0.03785763308405876, 'timestamp': '2025-09-30 22:39:42.003929', 'step': 22227, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.035281', 'step': 22227, 'epoch': 3} {'type': 'loss', 'content': 0.056053727865219116, 'timestamp': '2025-09-30 22:39:42.060720', 'step': 22228, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.090978', 'step': 22228, 'epoch': 3} {'type': 'loss', 'content': 0.06648664176464081, 'timestamp': '2025-09-30 22:39:42.095008', 'step': 22229, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:42.127108', 'step': 22229, 'epoch': 3} {'type': 'loss', 'content': 0.07100261002779007, 'timestamp': '2025-09-30 22:39:42.129619', 'step': 22230, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:42.180038', 'step': 22230, 'epoch': 3} {'type': 'loss', 'content': 0.0674889087677002, 'timestamp': '2025-09-30 22:39:42.183971', 'step': 22231, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.215386', 'step': 22231, 'epoch': 3} {'type': 'loss', 'content': 0.08813454955816269, 'timestamp': '2025-09-30 22:39:42.240860', 'step': 22232, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:42.273330', 'step': 22232, 'epoch': 3} {'type': 'loss', 'content': 0.06933144479990005, 'timestamp': '2025-09-30 22:39:42.278406', 'step': 22233, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:42.310654', 'step': 22233, 'epoch': 3} {'type': 'loss', 'content': 0.073953777551651, 'timestamp': '2025-09-30 22:39:42.321087', 'step': 22234, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.357898', 'step': 22234, 'epoch': 3} {'type': 'loss', 'content': 0.07658133655786514, 'timestamp': '2025-09-30 22:39:42.361028', 'step': 22235, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.394531', 'step': 22235, 'epoch': 3} {'type': 'loss', 'content': 0.04533347487449646, 'timestamp': '2025-09-30 22:39:42.423727', 'step': 22236, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:42.455140', 'step': 22236, 'epoch': 3} {'type': 'loss', 'content': 0.0637628361582756, 'timestamp': '2025-09-30 22:39:42.460178', 'step': 22237, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:42.492375', 'step': 22237, 'epoch': 3} {'type': 'loss', 'content': 0.07344107329845428, 'timestamp': '2025-09-30 22:39:42.498482', 'step': 22238, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.529872', 'step': 22238, 'epoch': 3} {'type': 'loss', 'content': 0.05190896987915039, 'timestamp': '2025-09-30 22:39:42.533828', 'step': 22239, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.568079', 'step': 22239, 'epoch': 3} {'type': 'loss', 'content': 0.07793079316616058, 'timestamp': '2025-09-30 22:39:42.591636', 'step': 22240, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.625423', 'step': 22240, 'epoch': 3} {'type': 'loss', 'content': 0.13995134830474854, 'timestamp': '2025-09-30 22:39:42.630520', 'step': 22241, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.663913', 'step': 22241, 'epoch': 3} {'type': 'loss', 'content': 0.07389745861291885, 'timestamp': '2025-09-30 22:39:42.668791', 'step': 22242, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.701827', 'step': 22242, 'epoch': 3} {'type': 'loss', 'content': 0.05312606319785118, 'timestamp': '2025-09-30 22:39:42.707001', 'step': 22243, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.738062', 'step': 22243, 'epoch': 3} {'type': 'loss', 'content': 0.04270577058196068, 'timestamp': '2025-09-30 22:39:42.763324', 'step': 22244, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:42.795102', 'step': 22244, 'epoch': 3} {'type': 'loss', 'content': 0.06897531449794769, 'timestamp': '2025-09-30 22:39:42.798267', 'step': 22245, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:42.843569', 'step': 22245, 'epoch': 3} {'type': 'loss', 'content': 0.05067873001098633, 'timestamp': '2025-09-30 22:39:42.849720', 'step': 22246, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:42.881105', 'step': 22246, 'epoch': 3} {'type': 'loss', 'content': 0.0632166713476181, 'timestamp': '2025-09-30 22:39:42.883602', 'step': 22247, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.920620', 'step': 22247, 'epoch': 3} {'type': 'loss', 'content': 0.07249347865581512, 'timestamp': '2025-09-30 22:39:42.945109', 'step': 22248, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:42.975243', 'step': 22248, 'epoch': 3} {'type': 'loss', 'content': 0.04962299019098282, 'timestamp': '2025-09-30 22:39:42.979244', 'step': 22249, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.012474', 'step': 22249, 'epoch': 3} {'type': 'loss', 'content': 0.13076290488243103, 'timestamp': '2025-09-30 22:39:43.016830', 'step': 22250, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.048371', 'step': 22250, 'epoch': 3} {'type': 'loss', 'content': 0.030961383134126663, 'timestamp': '2025-09-30 22:39:43.052152', 'step': 22251, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.083932', 'step': 22251, 'epoch': 3} {'type': 'loss', 'content': 0.07545053958892822, 'timestamp': '2025-09-30 22:39:43.110708', 'step': 22252, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.145615', 'step': 22252, 'epoch': 3} {'type': 'loss', 'content': 0.04979952052235603, 'timestamp': '2025-09-30 22:39:43.148959', 'step': 22253, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.184688', 'step': 22253, 'epoch': 3} {'type': 'loss', 'content': 0.020264284685254097, 'timestamp': '2025-09-30 22:39:43.188906', 'step': 22254, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.221792', 'step': 22254, 'epoch': 3} {'type': 'loss', 'content': 0.08563292771577835, 'timestamp': '2025-09-30 22:39:43.226060', 'step': 22255, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.257005', 'step': 22255, 'epoch': 3} {'type': 'loss', 'content': 0.044402241706848145, 'timestamp': '2025-09-30 22:39:43.283153', 'step': 22256, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.314997', 'step': 22256, 'epoch': 3} {'type': 'loss', 'content': 0.06620708107948303, 'timestamp': '2025-09-30 22:39:43.318566', 'step': 22257, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.350528', 'step': 22257, 'epoch': 3} {'type': 'loss', 'content': 0.01437850296497345, 'timestamp': '2025-09-30 22:39:43.354020', 'step': 22258, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:43.385410', 'step': 22258, 'epoch': 3} {'type': 'loss', 'content': 0.05067045986652374, 'timestamp': '2025-09-30 22:39:43.393452', 'step': 22259, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.429991', 'step': 22259, 'epoch': 3} {'type': 'loss', 'content': 0.09522674977779388, 'timestamp': '2025-09-30 22:39:43.454735', 'step': 22260, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:43.485177', 'step': 22260, 'epoch': 3} {'type': 'loss', 'content': 0.07346489280462265, 'timestamp': '2025-09-30 22:39:43.489545', 'step': 22261, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.523307', 'step': 22261, 'epoch': 3} {'type': 'loss', 'content': 0.0565665140748024, 'timestamp': '2025-09-30 22:39:43.527588', 'step': 22262, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.559347', 'step': 22262, 'epoch': 3} {'type': 'loss', 'content': 0.025889495387673378, 'timestamp': '2025-09-30 22:39:43.563887', 'step': 22263, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:43.595635', 'step': 22263, 'epoch': 3} {'type': 'loss', 'content': 0.07522985339164734, 'timestamp': '2025-09-30 22:39:43.621107', 'step': 22264, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.652636', 'step': 22264, 'epoch': 3} {'type': 'loss', 'content': 0.02505454607307911, 'timestamp': '2025-09-30 22:39:43.656454', 'step': 22265, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.687830', 'step': 22265, 'epoch': 3} {'type': 'loss', 'content': 0.05436467379331589, 'timestamp': '2025-09-30 22:39:43.691344', 'step': 22266, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.723185', 'step': 22266, 'epoch': 3} {'type': 'loss', 'content': 0.03827236220240593, 'timestamp': '2025-09-30 22:39:43.726784', 'step': 22267, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.760773', 'step': 22267, 'epoch': 3} {'type': 'loss', 'content': 0.07166118174791336, 'timestamp': '2025-09-30 22:39:43.787363', 'step': 22268, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:43.818787', 'step': 22268, 'epoch': 3} {'type': 'loss', 'content': 0.017673704773187637, 'timestamp': '2025-09-30 22:39:43.822991', 'step': 22269, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:43.857144', 'step': 22269, 'epoch': 3} {'type': 'loss', 'content': 0.03609379753470421, 'timestamp': '2025-09-30 22:39:43.861825', 'step': 22270, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:43.895170', 'step': 22270, 'epoch': 3} {'type': 'loss', 'content': 0.0734405368566513, 'timestamp': '2025-09-30 22:39:43.901521', 'step': 22271, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:43.934732', 'step': 22271, 'epoch': 3} {'type': 'loss', 'content': 0.09683314710855484, 'timestamp': '2025-09-30 22:39:43.960124', 'step': 22272, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.000276', 'step': 22272, 'epoch': 3} {'type': 'loss', 'content': 0.03905968740582466, 'timestamp': '2025-09-30 22:39:44.003826', 'step': 22273, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.036799', 'step': 22273, 'epoch': 3} {'type': 'loss', 'content': 0.04807251691818237, 'timestamp': '2025-09-30 22:39:44.040428', 'step': 22274, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.074079', 'step': 22274, 'epoch': 3} {'type': 'loss', 'content': 0.028076335787773132, 'timestamp': '2025-09-30 22:39:44.087157', 'step': 22275, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:44.126270', 'step': 22275, 'epoch': 3} {'type': 'loss', 'content': 0.08021846413612366, 'timestamp': '2025-09-30 22:39:44.153028', 'step': 22276, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.185788', 'step': 22276, 'epoch': 3} {'type': 'loss', 'content': 0.14152741432189941, 'timestamp': '2025-09-30 22:39:44.190014', 'step': 22277, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:44.226166', 'step': 22277, 'epoch': 3} {'type': 'loss', 'content': 0.043190788477659225, 'timestamp': '2025-09-30 22:39:44.235807', 'step': 22278, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.268562', 'step': 22278, 'epoch': 3} {'type': 'loss', 'content': 0.079323410987854, 'timestamp': '2025-09-30 22:39:44.273578', 'step': 22279, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:44.315726', 'step': 22279, 'epoch': 3} {'type': 'loss', 'content': 0.031399525701999664, 'timestamp': '2025-09-30 22:39:44.341788', 'step': 22280, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:44.373862', 'step': 22280, 'epoch': 3} {'type': 'loss', 'content': 0.12138630449771881, 'timestamp': '2025-09-30 22:39:44.381466', 'step': 22281, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.418770', 'step': 22281, 'epoch': 3} {'type': 'loss', 'content': 0.07546402513980865, 'timestamp': '2025-09-30 22:39:44.424885', 'step': 22282, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.457292', 'step': 22282, 'epoch': 3} {'type': 'loss', 'content': 0.007093813270330429, 'timestamp': '2025-09-30 22:39:44.461773', 'step': 22283, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.494056', 'step': 22283, 'epoch': 3} {'type': 'loss', 'content': 0.12838046252727509, 'timestamp': '2025-09-30 22:39:44.528690', 'step': 22284, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.565694', 'step': 22284, 'epoch': 3} {'type': 'loss', 'content': 0.12066806107759476, 'timestamp': '2025-09-30 22:39:44.570009', 'step': 22285, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.605089', 'step': 22285, 'epoch': 3} {'type': 'loss', 'content': 0.023644475266337395, 'timestamp': '2025-09-30 22:39:44.620038', 'step': 22286, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:44.659586', 'step': 22286, 'epoch': 3} {'type': 'loss', 'content': 0.004633552394807339, 'timestamp': '2025-09-30 22:39:44.663017', 'step': 22287, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:44.712172', 'step': 22287, 'epoch': 3} {'type': 'loss', 'content': 0.04886366426944733, 'timestamp': '2025-09-30 22:39:44.749725', 'step': 22288, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.794937', 'step': 22288, 'epoch': 3} {'type': 'loss', 'content': 0.057628776878118515, 'timestamp': '2025-09-30 22:39:44.808083', 'step': 22289, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.849317', 'step': 22289, 'epoch': 3} {'type': 'loss', 'content': 0.08446066081523895, 'timestamp': '2025-09-30 22:39:44.852988', 'step': 22290, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 176], 'flops': 5220903722048}, 'timestamp': '2025-09-30 22:39:44.885779', 'step': 22290, 'epoch': 3} {'type': 'loss', 'content': 0.02919176034629345, 'timestamp': '2025-09-30 22:39:44.891781', 'step': 22291, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.926184', 'step': 22291, 'epoch': 3} {'type': 'loss', 'content': 0.10331065952777863, 'timestamp': '2025-09-30 22:39:44.951724', 'step': 22292, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:44.983940', 'step': 22292, 'epoch': 3} {'type': 'loss', 'content': 0.09021927416324615, 'timestamp': '2025-09-30 22:39:44.987066', 'step': 22293, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:45.018448', 'step': 22293, 'epoch': 3} {'type': 'loss', 'content': 0.040354158729314804, 'timestamp': '2025-09-30 22:39:45.024151', 'step': 22294, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.059721', 'step': 22294, 'epoch': 3} {'type': 'loss', 'content': 0.07686178386211395, 'timestamp': '2025-09-30 22:39:45.064153', 'step': 22295, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.096786', 'step': 22295, 'epoch': 3} {'type': 'loss', 'content': 0.08806408941745758, 'timestamp': '2025-09-30 22:39:45.122148', 'step': 22296, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.155284', 'step': 22296, 'epoch': 3} {'type': 'loss', 'content': 0.046659767627716064, 'timestamp': '2025-09-30 22:39:45.158365', 'step': 22297, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.197284', 'step': 22297, 'epoch': 3} {'type': 'loss', 'content': 0.0256987065076828, 'timestamp': '2025-09-30 22:39:45.207920', 'step': 22298, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.244230', 'step': 22298, 'epoch': 3} {'type': 'loss', 'content': 0.013647496700286865, 'timestamp': '2025-09-30 22:39:45.247566', 'step': 22299, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.278880', 'step': 22299, 'epoch': 3} {'type': 'loss', 'content': 0.050296854227781296, 'timestamp': '2025-09-30 22:39:45.306346', 'step': 22300, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.339875', 'step': 22300, 'epoch': 3} {'type': 'loss', 'content': 0.07847492396831512, 'timestamp': '2025-09-30 22:39:45.353401', 'step': 22301, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:45.385787', 'step': 22301, 'epoch': 3} {'type': 'loss', 'content': 0.03800805285573006, 'timestamp': '2025-09-30 22:39:45.389599', 'step': 22302, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.428626', 'step': 22302, 'epoch': 3} {'type': 'loss', 'content': 0.06270767748355865, 'timestamp': '2025-09-30 22:39:45.432696', 'step': 22303, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.467457', 'step': 22303, 'epoch': 3} {'type': 'loss', 'content': 0.06887694448232651, 'timestamp': '2025-09-30 22:39:45.497302', 'step': 22304, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.529097', 'step': 22304, 'epoch': 3} {'type': 'loss', 'content': 0.08266334980726242, 'timestamp': '2025-09-30 22:39:45.536376', 'step': 22305, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.567317', 'step': 22305, 'epoch': 3} {'type': 'loss', 'content': 0.017045944929122925, 'timestamp': '2025-09-30 22:39:45.571704', 'step': 22306, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.608099', 'step': 22306, 'epoch': 3} {'type': 'loss', 'content': 0.003804827108979225, 'timestamp': '2025-09-30 22:39:45.612860', 'step': 22307, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.644583', 'step': 22307, 'epoch': 3} {'type': 'loss', 'content': 0.045263007283210754, 'timestamp': '2025-09-30 22:39:45.671373', 'step': 22308, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.704327', 'step': 22308, 'epoch': 3} {'type': 'loss', 'content': 0.07064726948738098, 'timestamp': '2025-09-30 22:39:45.709282', 'step': 22309, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.742103', 'step': 22309, 'epoch': 3} {'type': 'loss', 'content': 0.09778439253568649, 'timestamp': '2025-09-30 22:39:45.748998', 'step': 22310, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.783302', 'step': 22310, 'epoch': 3} {'type': 'loss', 'content': 0.07018741220235825, 'timestamp': '2025-09-30 22:39:45.790102', 'step': 22311, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:45.822518', 'step': 22311, 'epoch': 3} {'type': 'loss', 'content': 0.10810019075870514, 'timestamp': '2025-09-30 22:39:45.849196', 'step': 22312, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:45.882065', 'step': 22312, 'epoch': 3} {'type': 'loss', 'content': 0.05724664404988289, 'timestamp': '2025-09-30 22:39:45.887410', 'step': 22313, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.920517', 'step': 22313, 'epoch': 3} {'type': 'loss', 'content': 0.061941683292388916, 'timestamp': '2025-09-30 22:39:45.936764', 'step': 22314, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:45.975069', 'step': 22314, 'epoch': 3} {'type': 'loss', 'content': 0.05179276689887047, 'timestamp': '2025-09-30 22:39:45.984531', 'step': 22315, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.023227', 'step': 22315, 'epoch': 3} {'type': 'loss', 'content': 0.08289450407028198, 'timestamp': '2025-09-30 22:39:46.048724', 'step': 22316, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:46.081170', 'step': 22316, 'epoch': 3} {'type': 'loss', 'content': 0.03666723519563675, 'timestamp': '2025-09-30 22:39:46.084682', 'step': 22317, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.118770', 'step': 22317, 'epoch': 3} {'type': 'loss', 'content': 0.05149657279253006, 'timestamp': '2025-09-30 22:39:46.124078', 'step': 22318, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.157735', 'step': 22318, 'epoch': 3} {'type': 'loss', 'content': 0.04186288267374039, 'timestamp': '2025-09-30 22:39:46.163562', 'step': 22319, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.196422', 'step': 22319, 'epoch': 3} {'type': 'loss', 'content': 0.043573301285505295, 'timestamp': '2025-09-30 22:39:46.232017', 'step': 22320, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:46.272262', 'step': 22320, 'epoch': 3} {'type': 'loss', 'content': 0.09167075157165527, 'timestamp': '2025-09-30 22:39:46.283216', 'step': 22321, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.315050', 'step': 22321, 'epoch': 3} {'type': 'loss', 'content': 0.05697236210107803, 'timestamp': '2025-09-30 22:39:46.321131', 'step': 22322, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:46.352915', 'step': 22322, 'epoch': 3} {'type': 'loss', 'content': 0.07858923077583313, 'timestamp': '2025-09-30 22:39:46.359927', 'step': 22323, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.396042', 'step': 22323, 'epoch': 3} {'type': 'loss', 'content': 0.05740895867347717, 'timestamp': '2025-09-30 22:39:46.433448', 'step': 22324, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:46.467994', 'step': 22324, 'epoch': 3} {'type': 'loss', 'content': 0.021673157811164856, 'timestamp': '2025-09-30 22:39:46.471709', 'step': 22325, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.509187', 'step': 22325, 'epoch': 3} {'type': 'loss', 'content': 0.07223894447088242, 'timestamp': '2025-09-30 22:39:46.517702', 'step': 22326, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.554669', 'step': 22326, 'epoch': 3} {'type': 'loss', 'content': 0.060640398412942886, 'timestamp': '2025-09-30 22:39:46.559877', 'step': 22327, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.593545', 'step': 22327, 'epoch': 3} {'type': 'loss', 'content': 0.15894661843776703, 'timestamp': '2025-09-30 22:39:46.620082', 'step': 22328, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.651086', 'step': 22328, 'epoch': 3} {'type': 'loss', 'content': 0.12230845540761948, 'timestamp': '2025-09-30 22:39:46.656164', 'step': 22329, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.687809', 'step': 22329, 'epoch': 3} {'type': 'loss', 'content': 0.023139292374253273, 'timestamp': '2025-09-30 22:39:46.694736', 'step': 22330, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.727158', 'step': 22330, 'epoch': 3} {'type': 'loss', 'content': 0.01066521555185318, 'timestamp': '2025-09-30 22:39:46.740329', 'step': 22331, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.771186', 'step': 22331, 'epoch': 3} {'type': 'loss', 'content': 0.050381850451231, 'timestamp': '2025-09-30 22:39:46.796608', 'step': 22332, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:46.834269', 'step': 22332, 'epoch': 3} {'type': 'loss', 'content': 0.057975590229034424, 'timestamp': '2025-09-30 22:39:46.839415', 'step': 22333, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.872961', 'step': 22333, 'epoch': 3} {'type': 'loss', 'content': 0.03693520277738571, 'timestamp': '2025-09-30 22:39:46.877895', 'step': 22334, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:46.911552', 'step': 22334, 'epoch': 3} {'type': 'loss', 'content': 0.06601761281490326, 'timestamp': '2025-09-30 22:39:46.921790', 'step': 22335, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:46.961922', 'step': 22335, 'epoch': 3} {'type': 'loss', 'content': 0.014757279306650162, 'timestamp': '2025-09-30 22:39:46.986791', 'step': 22336, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:47.021370', 'step': 22336, 'epoch': 3} {'type': 'loss', 'content': 0.07913615554571152, 'timestamp': '2025-09-30 22:39:47.025061', 'step': 22337, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:47.059110', 'step': 22337, 'epoch': 3} {'type': 'loss', 'content': 0.022552672773599625, 'timestamp': '2025-09-30 22:39:47.063282', 'step': 22338, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.096207', 'step': 22338, 'epoch': 3} {'type': 'loss', 'content': 0.13487112522125244, 'timestamp': '2025-09-30 22:39:47.102382', 'step': 22339, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:47.135535', 'step': 22339, 'epoch': 3} {'type': 'loss', 'content': 0.05362286418676376, 'timestamp': '2025-09-30 22:39:47.162715', 'step': 22340, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:47.194952', 'step': 22340, 'epoch': 3} {'type': 'loss', 'content': 0.012478589080274105, 'timestamp': '2025-09-30 22:39:47.198432', 'step': 22341, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.234983', 'step': 22341, 'epoch': 3} {'type': 'loss', 'content': 0.07825452089309692, 'timestamp': '2025-09-30 22:39:47.238346', 'step': 22342, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.272370', 'step': 22342, 'epoch': 3} {'type': 'loss', 'content': 0.06615957617759705, 'timestamp': '2025-09-30 22:39:47.275809', 'step': 22343, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.314009', 'step': 22343, 'epoch': 3} {'type': 'loss', 'content': 0.0291295126080513, 'timestamp': '2025-09-30 22:39:47.339180', 'step': 22344, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.379635', 'step': 22344, 'epoch': 3} {'type': 'loss', 'content': 0.09415967762470245, 'timestamp': '2025-09-30 22:39:47.383766', 'step': 22345, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.415964', 'step': 22345, 'epoch': 3} {'type': 'loss', 'content': 0.018422642722725868, 'timestamp': '2025-09-30 22:39:47.420743', 'step': 22346, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:47.465493', 'step': 22346, 'epoch': 3} {'type': 'loss', 'content': 0.04322034493088722, 'timestamp': '2025-09-30 22:39:47.476691', 'step': 22347, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:47.515605', 'step': 22347, 'epoch': 3} {'type': 'loss', 'content': 0.031178874894976616, 'timestamp': '2025-09-30 22:39:47.542052', 'step': 22348, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:47.575933', 'step': 22348, 'epoch': 3} {'type': 'loss', 'content': 0.07988948374986649, 'timestamp': '2025-09-30 22:39:47.585683', 'step': 22349, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.625831', 'step': 22349, 'epoch': 3} {'type': 'loss', 'content': 0.0506645068526268, 'timestamp': '2025-09-30 22:39:47.629080', 'step': 22350, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:47.670017', 'step': 22350, 'epoch': 3} {'type': 'loss', 'content': 0.10558147728443146, 'timestamp': '2025-09-30 22:39:47.681715', 'step': 22351, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:47.715018', 'step': 22351, 'epoch': 3} {'type': 'loss', 'content': 0.12087372690439224, 'timestamp': '2025-09-30 22:39:47.742952', 'step': 22352, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:47.776905', 'step': 22352, 'epoch': 3} {'type': 'loss', 'content': 0.051157861948013306, 'timestamp': '2025-09-30 22:39:47.794450', 'step': 22353, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 144], 'flops': 4271696270016}, 'timestamp': '2025-09-30 22:39:47.827866', 'step': 22353, 'epoch': 3} {'type': 'loss', 'content': 0.03380284830927849, 'timestamp': '2025-09-30 22:39:47.832783', 'step': 22354, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.865339', 'step': 22354, 'epoch': 3} {'type': 'loss', 'content': 0.05419301986694336, 'timestamp': '2025-09-30 22:39:47.868714', 'step': 22355, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.900807', 'step': 22355, 'epoch': 3} {'type': 'loss', 'content': 0.06443499028682709, 'timestamp': '2025-09-30 22:39:47.926611', 'step': 22356, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:47.960341', 'step': 22356, 'epoch': 3} {'type': 'loss', 'content': 0.06751878559589386, 'timestamp': '2025-09-30 22:39:47.965353', 'step': 22357, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:47.998511', 'step': 22357, 'epoch': 3} {'type': 'loss', 'content': 0.007442628499120474, 'timestamp': '2025-09-30 22:39:48.014627', 'step': 22358, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:48.060273', 'step': 22358, 'epoch': 3} {'type': 'loss', 'content': 0.06994534283876419, 'timestamp': '2025-09-30 22:39:48.062734', 'step': 22359, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:48.094258', 'step': 22359, 'epoch': 3} {'type': 'loss', 'content': 0.06651853024959564, 'timestamp': '2025-09-30 22:39:48.121171', 'step': 22360, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:48.157217', 'step': 22360, 'epoch': 3} {'type': 'loss', 'content': 0.044632166624069214, 'timestamp': '2025-09-30 22:39:48.159972', 'step': 22361, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:48.192347', 'step': 22361, 'epoch': 3} {'type': 'loss', 'content': 0.06314843893051147, 'timestamp': '2025-09-30 22:39:48.198530', 'step': 22362, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 192], 'flops': 5695507448064}, 'timestamp': '2025-09-30 22:39:48.231098', 'step': 22362, 'epoch': 3} {'type': 'loss', 'content': 0.08382204920053482, 'timestamp': '2025-09-30 22:39:48.242436', 'step': 22363, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:48.284664', 'step': 22363, 'epoch': 3} {'type': 'loss', 'content': 0.032833199948072433, 'timestamp': '2025-09-30 22:39:48.315623', 'step': 22364, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:48.352825', 'step': 22364, 'epoch': 3} {'type': 'loss', 'content': 0.03787967562675476, 'timestamp': '2025-09-30 22:39:48.358238', 'step': 22365, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:48.391725', 'step': 22365, 'epoch': 3} {'type': 'loss', 'content': 0.03925856575369835, 'timestamp': '2025-09-30 22:39:48.395254', 'step': 22366, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:48.428165', 'step': 22366, 'epoch': 3} {'type': 'loss', 'content': 0.15880466997623444, 'timestamp': '2025-09-30 22:39:48.432506', 'step': 22367, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:48.465766', 'step': 22367, 'epoch': 3} {'type': 'loss', 'content': 0.09147130697965622, 'timestamp': '2025-09-30 22:39:48.494360', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:39:56.552189', 'step': 22368, 'epoch': 3} {'type': 'pplx', 'content': 8452.099670198637, 'timestamp': '2025-09-30 22:39:56.568959', 'step': 22368, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 160], 'flops': 4746299996032}, 'timestamp': '2025-09-30 22:39:56.606662', 'step': 22368, 'epoch': 3} {'type': 'loss', 'content': 0.09243287891149521, 'timestamp': '2025-09-30 22:39:56.610484', 'step': 22369, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:56.655706', 'step': 22369, 'epoch': 3} {'type': 'loss', 'content': 0.0535757839679718, 'timestamp': '2025-09-30 22:39:56.667653', 'step': 22370, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:56.710049', 'step': 22370, 'epoch': 3} {'type': 'loss', 'content': 0.05095786973834038, 'timestamp': '2025-09-30 22:39:56.714355', 'step': 22371, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:56.768367', 'step': 22371, 'epoch': 3} {'type': 'loss', 'content': 0.012209965847432613, 'timestamp': '2025-09-30 22:39:56.794237', 'step': 22372, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:56.825315', 'step': 22372, 'epoch': 3} {'type': 'loss', 'content': 0.04931655153632164, 'timestamp': '2025-09-30 22:39:56.829626', 'step': 22373, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:56.861644', 'step': 22373, 'epoch': 3} {'type': 'loss', 'content': 0.027601512148976326, 'timestamp': '2025-09-30 22:39:56.880378', 'step': 22374, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 80], 'flops': 2373281365952}, 'timestamp': '2025-09-30 22:39:56.915359', 'step': 22374, 'epoch': 3} {'type': 'loss', 'content': 0.12228304892778397, 'timestamp': '2025-09-30 22:39:56.918431', 'step': 22375, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 128], 'flops': 3797092544000}, 'timestamp': '2025-09-30 22:39:56.949333', 'step': 22375, 'epoch': 3} {'type': 'loss', 'content': 0.03703676164150238, 'timestamp': '2025-09-30 22:39:56.977016', 'step': 22376, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:57.014473', 'step': 22376, 'epoch': 3} {'type': 'loss', 'content': 0.0633322149515152, 'timestamp': '2025-09-30 22:39:57.024714', 'step': 22377, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 112], 'flops': 3322488817984}, 'timestamp': '2025-09-30 22:39:57.066317', 'step': 22377, 'epoch': 3} {'type': 'loss', 'content': 0.035431720316410065, 'timestamp': '2025-09-30 22:39:57.070750', 'step': 22378, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [4, 96], 'flops': 2847885091968}, 'timestamp': '2025-09-30 22:39:57.104823', 'step': 22378, 'epoch': 3} {'type': 'loss', 'content': 0.01511420402675867, 'timestamp': '2025-09-30 22:39:57.108873', 'step': 22379, 'epoch': 3} {'type': 'flops', 'content': {'type': 'train', 'batch_dim': [1, 208], 'flops': 1542724875376}, 'timestamp': '2025-09-30 22:39:57.142515', 'step': 22379, 'epoch': 3} {'type': 'loss', 'content': 0.00025051101692952216, 'timestamp': '2025-09-30 22:39:57.168712', 'step': 22380, 'epoch': 3} {'type': 'flops', 'content': [{'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 192], 'batch_size': 8, 'flops': 3796808916480}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 160], 'batch_size': 8, 'flops': 3164007441664}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 48], 'batch_size': 8, 'flops': 949202279808}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 128], 'batch_size': 8, 'flops': 2531205966848}, {'type': 'perplexity', 'in_batch_dim': [8, 144], 'batch_size': 8, 'flops': 2847606704256}, {'type': 'perplexity', 'in_batch_dim': [8, 112], 'batch_size': 8, 'flops': 2214805229440}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 64], 'batch_size': 8, 'flops': 1265603017216}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 96], 'batch_size': 8, 'flops': 1898404492032}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [8, 80], 'batch_size': 8, 'flops': 1582003754624}, {'type': 'perplexity', 'in_batch_dim': [3, 48], 'batch_size': 8, 'flops': 949202279808}], 'timestamp': '2025-09-30 22:40:05.147905', 'step': 22380, 'epoch': 3} {'type': 'pplx', 'content': 8695.394420522354, 'timestamp': '2025-09-30 22:40:05.152869', 'step': 22380, 'epoch': 3} {'type': 'best_pplx', 'content': 7091.001736438246, 'timestamp': '2025-09-30 22:40:05.156349', 'step': 22380, 'epoch': 3} {'type': 'best_step', 'content': 932, 'timestamp': '2025-09-30 22:40:05.164278', 'step': 22380, 'epoch': 3} {'type': 'total_pplx_flops', 'content': 50323539177094400, 'timestamp': '2025-09-30 22:40:05.169024', 'step': 22380, 'epoch': 3} {'type': 'total_train_flops', 'content': 76563139214162640, 'timestamp': '2025-09-30 22:40:05.171738', 'step': 22380, 'epoch': 3}